support for bibtex token markup;
authorwenzelm
Sat, 04 Oct 2014 12:19:26 +0200
changeset 58529 cd4439d8799c
parent 58528 7d6b8f8893e8
child 58530 7ee248f19ca9
support for bibtex token markup; more robust ML token marker: no_context; tuned signature;
src/Pure/Tools/bibtex.scala
src/Tools/jEdit/lib/Tools/jedit
src/Tools/jEdit/src/bibtex_token_markup.scala
src/Tools/jEdit/src/isabelle.scala
src/Tools/jEdit/src/isabelle_sidekick.scala
src/Tools/jEdit/src/token_markup.scala
--- a/src/Pure/Tools/bibtex.scala	Fri Oct 03 23:33:47 2014 +0200
+++ b/src/Pure/Tools/bibtex.scala	Sat Oct 04 12:19:26 2014 +0200
@@ -16,7 +16,7 @@
 {
   /** content **/
 
-  val months = List(
+  private val months = List(
     "jan",
     "feb",
     "mar",
@@ -29,16 +29,22 @@
     "oct",
     "nov",
     "dec")
+  def is_month(s: String): Boolean = months.contains(s.toLowerCase)
 
-  val commands = List("preamble", "string")
+  private val commands = List("preamble", "string")
+  def is_command(s: String): Boolean = commands.contains(s.toLowerCase)
 
   sealed case class Entry(
     kind: String,
     required: List[String],
     optional_crossref: List[String],
-    optional: List[String])
+    optional_other: List[String])
   {
-    def fields: List[String] = required ::: optional_crossref ::: optional
+    def is_required(s: String): Boolean = required.contains(s)
+    def is_optional(s: String): Boolean =
+      optional_crossref.contains(s) || optional_other.contains(s)
+
+    def fields: List[String] = required ::: optional_crossref ::: optional_other
     def template: String =
       "@" + kind + "{,\n" + fields.map(x => "  " + x + " = {},\n").mkString + "}\n"
   }
@@ -101,6 +107,9 @@
         List(),
         List("author", "title", "howpublished", "month", "year", "note")))
 
+  def get_entry(kind: String): Option[Entry] =
+    entries.find(entry => entry.kind.toLowerCase == kind.toLowerCase)
+
 
 
   /** tokens and chunks **/
@@ -109,42 +118,45 @@
   {
     object Kind extends Enumeration
     {
+      val COMMAND = Value("command")
+      val ENTRY = Value("entry")
       val KEYWORD = Value("keyword")
       val NAT = Value("natural number")
+      val STRING = Value("string")
       val IDENT = Value("identifier")
-      val STRING = Value("string")
-      val SPACE = Value("white space")
+      val IGNORED = Value("ignored")
       val ERROR = Value("bad input")
     }
   }
 
   sealed case class Token(kind: Token.Kind.Value, val source: String)
   {
-    def is_space: Boolean = kind == Token.Kind.SPACE
+    def is_ignored: Boolean = kind == Token.Kind.IGNORED
     def is_error: Boolean = kind == Token.Kind.ERROR
   }
 
   abstract class Chunk
   {
-    def size: Int
     def kind: String
+    def tokens: List[Token]
+    def source: String
   }
 
   case class Ignored(source: String) extends Chunk
   {
-    def size: Int = source.size
     def kind: String = ""
+    val tokens = List(Token(Token.Kind.IGNORED, source))
   }
 
   case class Item(kind: String, tokens: List[Token]) extends Chunk
   {
-    def size: Int = (0 /: tokens)({ case (n, token) => n + token.source.size })
+    val source = tokens.map(_.source).mkString
 
     private val wellformed_content: Option[List[Token]] =
       tokens match {
         case Token(Token.Kind.KEYWORD, "@") :: body
         if !body.isEmpty && !body.exists(_.is_error) =>
-          (body.init.filterNot(_.is_space), body.last) match {
+          (body.init.filterNot(_.is_ignored), body.last) match {
             case (Token(Token.Kind.IDENT, _) :: Token(Token.Kind.KEYWORD, "{") :: toks,
                   Token(Token.Kind.KEYWORD, "}")) => Some(toks)
             case (Token(Token.Kind.IDENT, _) :: Token(Token.Kind.KEYWORD, "(") :: toks,
@@ -187,8 +199,8 @@
 
     override val whiteSpace = "".r
 
-    private val space = """[ \t\n\r]+""".r ^^ token(Token.Kind.SPACE)
-    private val strict_space = """[ \t]+""".r ^^ token(Token.Kind.SPACE)
+    private val space = """[ \t\n\r]+""".r ^^ token(Token.Kind.IGNORED)
+    private val strict_space = """[ \t]+""".r ^^ token(Token.Kind.IGNORED)
 
 
     /* ignored material outside items */
@@ -264,16 +276,27 @@
 
     private val nat = "[0-9]+".r ^^ token(Token.Kind.NAT)
 
-    private val ident =
-      """[\x21-\x7f&&[^"#%'(),={}0-9]][\x21-\x7f&&[^"#%'(),={}]]*""".r ^^ token(Token.Kind.IDENT)
+    private val identifier =
+      """[\x21-\x7f&&[^"#%'(),={}0-9]][\x21-\x7f&&[^"#%'(),={}]]*""".r
+
+    private val ident = identifier ^^ token(Token.Kind.IDENT)
 
     val other_token = "[=#,]".r ^^ keyword | (nat | (ident | space))
 
 
     /* items */
 
+    private val special_ident =
+      identifier ^^ { case a =>
+        val kind =
+          if (is_command(a)) Token.Kind.COMMAND
+          else if (get_entry(a).isDefined) Token.Kind.ENTRY
+          else Token.Kind.IDENT
+        Token(kind, a)
+      }
+
     private val item_start: Parser[(String, List[Token])] =
-      at ~ rep(strict_space) ~ ident ~ rep(strict_space) ^^
+      at ~ rep(strict_space) ~ special_ident ~ rep(strict_space) ^^
         { case a ~ b ~ c ~ d => (c.source, List(a) ::: b ::: List(c) ::: d) }
 
     private val item_body =
@@ -285,7 +308,7 @@
         { case (kind, a) ~ b ~ c ~ d => Item(kind, a ::: List(b) ::: c ::: d.toList) }
 
     private val recover_item: Parser[Item] =
-      at ~ "(?m)[^@]+".r ^^ { case a ~ b => Item("", List(a, Token(Token.Kind.ERROR, b))) }
+      at ~ "(?m)[^@]*".r ^^ { case a ~ b => Item("", List(a, Token(Token.Kind.ERROR, b))) }
 
     def item_line(ctxt: Line_Context): Parser[(Item, Line_Context)] =
     {
@@ -294,7 +317,8 @@
           item_start ~ (left_brace | left_paren) ^^
             { case (kind, a) ~ b =>
                 val right = if (b.source == "{") "}" else ")"
-                (Item(kind, a ::: List(b)), Item_Context(kind, Closed, right)) }
+                (Item(kind, a ::: List(b)), Item_Context(kind, Closed, right)) } |
+          recover_item ^^ { case a => (a, Ignored_Context) }
         case Item_Context(kind, delim, right) =>
           if (delim.depth > 0)
             delimited_line(ctxt)
--- a/src/Tools/jEdit/lib/Tools/jedit	Fri Oct 03 23:33:47 2014 +0200
+++ b/src/Tools/jEdit/lib/Tools/jedit	Sat Oct 04 12:19:26 2014 +0200
@@ -9,6 +9,7 @@
 
 declare -a SOURCES=(
   "src/active.scala"
+  "src/bibtex_token_markup.scala"
   "src/completion_popup.scala"
   "src/context_menu.scala"
   "src/dockable.scala"
@@ -41,9 +42,9 @@
   "src/rendering.scala"
   "src/rich_text_area.scala"
   "src/scala_console.scala"
-  "src/sledgehammer_dockable.scala"
   "src/simplifier_trace_dockable.scala"
   "src/simplifier_trace_window.scala"
+  "src/sledgehammer_dockable.scala"
   "src/spell_checker.scala"
   "src/symbols_dockable.scala"
   "src/syslog_dockable.scala"
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Tools/jEdit/src/bibtex_token_markup.scala	Sat Oct 04 12:19:26 2014 +0200
@@ -0,0 +1,94 @@
+/*  Title:      Tools/jEdit/src/bibtex_token_markup.scala
+    Author:     Makarius
+
+Bibtex token markup.
+*/
+
+package isabelle.jedit
+
+
+import isabelle._
+
+import org.gjt.sp.jedit.syntax.{Token => JEditToken, TokenMarker, TokenHandler, ParserRuleSet}
+
+import javax.swing.text.Segment
+
+
+object Bibtex_Token_Markup
+{
+  /* token style */
+
+  private def token_style(item_kind: String, token: Bibtex.Token): Byte =
+    token.kind match {
+      case Bibtex.Token.Kind.COMMAND => JEditToken.KEYWORD2
+      case Bibtex.Token.Kind.ENTRY => JEditToken.KEYWORD1
+      case Bibtex.Token.Kind.KEYWORD => JEditToken.OPERATOR
+      case Bibtex.Token.Kind.NAT => JEditToken.LITERAL2
+      case Bibtex.Token.Kind.STRING => JEditToken.LITERAL1
+      case Bibtex.Token.Kind.IDENT =>
+        if (Bibtex.is_month(token.source)) JEditToken.LITERAL3
+        else
+          Bibtex.get_entry(item_kind) match {
+            case Some(entry) if entry.is_required(token.source) => JEditToken.KEYWORD3
+            case Some(entry) if entry.is_optional(token.source) => JEditToken.KEYWORD4
+            case _ => JEditToken.DIGIT
+          }
+      case Bibtex.Token.Kind.IGNORED => JEditToken.NULL
+      case Bibtex.Token.Kind.ERROR => JEditToken.INVALID
+    }
+
+
+  /* line context */
+
+  private val context_rules = new ParserRuleSet("bibtex", "MAIN")
+
+  private class Line_Context(context: Option[Bibtex.Line_Context])
+    extends Token_Markup.Generic_Line_Context[Bibtex.Line_Context](context_rules, context)
+
+
+  /* token marker */
+
+  class Marker extends TokenMarker
+  {
+    override def markTokens(context: TokenMarker.LineContext,
+        handler: TokenHandler, raw_line: Segment): TokenMarker.LineContext =
+    {
+      val line_ctxt =
+        context match {
+          case c: Line_Context => c.context
+          case _ => Some(Bibtex.Ignored_Context)
+        }
+      val line = if (raw_line == null) new Segment else raw_line
+
+      val context1 =
+      {
+        val (styled_tokens, context1) =
+          line_ctxt match {
+            case Some(ctxt) =>
+              val (chunks, ctxt1) = Bibtex.parse_line(line, ctxt)
+              val styled_tokens =
+                for { chunk <- chunks; tok <- chunk.tokens }
+                yield (token_style(chunk.kind, tok), tok.source)
+              (styled_tokens, new Line_Context(Some(ctxt1)))
+            case None =>
+              val styled_token = (JEditToken.NULL, line.subSequence(0, line.count).toString)
+              (List(styled_token), new Line_Context(None))
+          }
+
+        var offset = 0
+        for ((style, token) <- styled_tokens) {
+          val length = token.length
+          val end_offset = offset + length
+          handler.handleToken(line, style, offset, length, context1)
+          offset += length
+        }
+        handler.handleToken(line, JEditToken.END, line.count, 0, context1)
+        context1
+      }
+      val context2 = context1.intern
+      handler.setLineContext(context2)
+      context2
+    }
+  }
+}
+
--- a/src/Tools/jEdit/src/isabelle.scala	Fri Oct 03 23:33:47 2014 +0200
+++ b/src/Tools/jEdit/src/isabelle.scala	Sat Oct 04 12:19:26 2014 +0200
@@ -16,6 +16,7 @@
 
 import org.gjt.sp.jedit.{jEdit, View, Buffer}
 import org.gjt.sp.jedit.textarea.JEditTextArea
+import org.gjt.sp.jedit.syntax.TokenMarker
 import org.gjt.sp.jedit.gui.{DockableWindowManager, CompleteWord}
 import org.gjt.sp.jedit.options.PluginOptions
 
@@ -65,10 +66,11 @@
 
   /* token markers */
 
-  private val markers =
-    Map(modes.map(name => (name, new Token_Markup.Marker(name))): _*)
+  private val markers: Map[String, TokenMarker] =
+    Map(modes.map(name => (name, new Token_Markup.Marker(name))): _*) +
+      ("bibtex" -> new Bibtex_Token_Markup.Marker)
 
-  def token_marker(name: String): Option[Token_Markup.Marker] = markers.get(name)
+  def token_marker(name: String): Option[TokenMarker] = markers.get(name)
 
 
   /* dockable windows */
--- a/src/Tools/jEdit/src/isabelle_sidekick.scala	Fri Oct 03 23:33:47 2014 +0200
+++ b/src/Tools/jEdit/src/isabelle_sidekick.scala	Sat Oct 04 12:19:26 2014 +0200
@@ -231,7 +231,7 @@
     try {
       var offset = 0
       for (chunk <- Bibtex.parse(JEdit_Lib.buffer_text(buffer))) {
-        val n = chunk.size
+        val n = chunk.source.size
         chunk match {
           case item: Bibtex.Item if item.is_wellformed =>
             val label = if (item.name == "") item.kind else item.kind + " " + item.name
--- a/src/Tools/jEdit/src/token_markup.scala	Fri Oct 03 23:33:47 2014 +0200
+++ b/src/Tools/jEdit/src/token_markup.scala	Sat Oct 04 12:19:26 2014 +0200
@@ -15,8 +15,8 @@
 
 import org.gjt.sp.util.SyntaxUtilities
 import org.gjt.sp.jedit.{jEdit, Mode}
-import org.gjt.sp.jedit.syntax.{Token => JEditToken, TokenMarker, TokenHandler,
-  ParserRuleSet, ModeProvider, XModeHandler, SyntaxStyle}
+import org.gjt.sp.jedit.syntax.{Token => JEditToken, TokenMarker, TokenHandler, ParserRuleSet,
+  ModeProvider, XModeHandler, SyntaxStyle}
 import org.gjt.sp.jedit.textarea.{TextArea, Selection}
 import org.gjt.sp.jedit.buffer.JEditBuffer
 
@@ -173,12 +173,10 @@
   }
 
 
-  /* token marker */
+  /* line context */
 
-  private val isabelle_rules = new ParserRuleSet("isabelle", "MAIN")
-
-  private class Line_Context(val context: Option[Scan.Line_Context])
-    extends TokenMarker.LineContext(isabelle_rules, null)
+  class Generic_Line_Context[C](rules: ParserRuleSet, val context: Option[C])
+    extends TokenMarker.LineContext(rules, null)
   {
     override def hashCode: Int = context.hashCode
     override def equals(that: Any): Boolean =
@@ -188,6 +186,14 @@
       }
   }
 
+  private val context_rules = new ParserRuleSet("isabelle", "MAIN")
+
+  private class Line_Context(context: Option[Scan.Line_Context])
+    extends Generic_Line_Context[Scan.Line_Context](context_rules, context)
+
+
+  /* token marker */
+
   class Marker(mode: String) extends TokenMarker
   {
     override def markTokens(context: TokenMarker.LineContext,
@@ -202,11 +208,19 @@
 
       val context1 =
       {
+        def no_context =
+        {
+          val styled_token = (JEditToken.NULL, line.subSequence(0, line.count).toString)
+          (List(styled_token), new Line_Context(None))
+        }
         val (styled_tokens, context1) =
           if (mode == "isabelle-ml" || mode == "sml") {
-            val (tokens, ctxt1) = ML_Lex.tokenize_line(mode == "sml", line, line_ctxt.get)
-            val styled_tokens = tokens.map(tok => (Rendering.ml_token_markup(tok), tok.source))
-            (styled_tokens, new Line_Context(Some(ctxt1)))
+            if (line_ctxt.isDefined) {
+              val (tokens, ctxt1) = ML_Lex.tokenize_line(mode == "sml", line, line_ctxt.get)
+              val styled_tokens = tokens.map(tok => (Rendering.ml_token_markup(tok), tok.source))
+              (styled_tokens, new Line_Context(Some(ctxt1)))
+            }
+            else no_context
           }
           else {
             Isabelle.mode_syntax(mode) match {
@@ -215,9 +229,7 @@
                 val styled_tokens =
                   tokens.map(tok => (Rendering.token_markup(syntax, tok), tok.source))
                 (styled_tokens, new Line_Context(Some(ctxt1)))
-              case _ =>
-                val styled_token = (JEditToken.NULL, line.subSequence(0, line.count).toString)
-                (List(styled_token), new Line_Context(None))
+              case _ => no_context
             }
           }