parse citations from raw source, without formal context;
authorwenzelm
Thu, 19 Jan 2023 11:23:44 +0100
changeset 77011 3e48f8c6afc9
parent 77010 fead2b33acdc
child 77012 2ac1b7f4f3e4
parse citations from raw source, without formal context;
src/Pure/General/antiquote.scala
src/Pure/Thy/bibtex.scala
src/Pure/Thy/thy_header.scala
--- a/src/Pure/General/antiquote.scala	Wed Jan 18 16:49:01 2023 +0100
+++ b/src/Pure/General/antiquote.scala	Thu Jan 19 11:23:44 2023 +0100
@@ -38,6 +38,11 @@
 
     val antiquote: Parser[Antiquote] =
       txt ^^ (x => Text(x)) | (control ^^ (x => Control(x)) | antiq ^^ (x => Antiq(x)))
+
+    def antiquote_special(special: Symbol.Symbol => Boolean): Parser[Antiquote] =
+      many1(s => !special(s)) ^^ Text.apply |
+      one(special) ~ opt(cartouche) ^^
+        { case x ~ Some(y) => Control(x + y) case x ~ None => Control(x) }
   }
 
 
--- a/src/Pure/Thy/bibtex.scala	Wed Jan 18 16:49:01 2023 +0100
+++ b/src/Pure/Thy/bibtex.scala	Thu Jan 19 11:23:44 2023 +0100
@@ -14,6 +14,8 @@
 import scala.util.parsing.input.Reader
 import scala.util.matching.Regex
 
+import isabelle.{Token => Isar_Token}
+
 
 object Bibtex {
   /** file format **/
@@ -734,6 +736,7 @@
   private val Cite_Command = """\\(cite|nocite|citet|citep)((?:\[[^\]]*\])?)\{([^}]*)\}""".r
   private val Cite_Macro = """\[\s*cite_macro\s*=\s*"?(\w+)"?\]\s*""".r
   private val CITE = "cite"
+  private val NOCITE = "nocite"
 
   def update_cite_commands(str: String): String =
     Cite_Command.replaceAllIn(str, { m =>
@@ -766,4 +769,75 @@
         else cite_antiquotation(CITE, body2 + " using " + quote(name))
     }
   }
+
+  object Cite_Parsers extends Parse.Parsers {
+    val keywords: Keyword.Keywords = Thy_Header.bootstrap_keywords + "in" + "and" + "using"
+
+    val location: Parser[String] = embedded ~ $$$("in") ^^ { case x ~ _ => x } | success("")
+    val citations: Parser[String] = rep1sep(name, $$$("and")) ^^ (x => x.mkString(","))
+    val kind: Parser[String] = $$$("using") ~! name ^^ { case _ ~ x => x } | success(CITE)
+
+    val nocite: Parser[Latex.Text] =
+      location ~ citations ~ kind ^^ { case _ ~ x ~ _ =>
+        List(XML.elem(Markup.Latex_Cite(Markup.Kind(NOCITE) ::: Markup.Citations(x))))
+      }
+  }
+
+  sealed case class Cite(kind: String, body: String, pos: Isar_Token.Pos) {
+    def position: Position.T = pos.position()
+
+    def nocite_latex: Latex.Text = {
+      val Parsers = Cite_Parsers
+      val tokens = Isar_Token.explode(Parsers.keywords, body)
+      Parsers.parse_all(Parsers.nocite, Isar_Token.reader(tokens, pos)) match {
+        case Parsers.Success(nocite, _) => List(XML.Elem(Markup.Latex_Output(position), nocite))
+        case _ => Nil
+      }
+    }
+
+    def errors: List[String] =
+      if (nocite_latex.nonEmpty) Nil
+      else List("Malformed citation" + Position.here(position))
+  }
+
+  def parse_citations(
+    cite_commands: List[String],
+    text: String,
+    start: Isar_Token.Pos = Isar_Token.Pos.start
+  ): List[Cite] = {
+    val controls = cite_commands.map(s => Symbol.control_prefix + s + Symbol.control_suffix)
+    val special = (controls ::: controls.map(Symbol.decode)).toSet
+
+    val Parsers = Antiquote.Parsers
+    Parsers.parseAll(Parsers.rep(Parsers.antiquote_special(special)), text) match {
+      case Parsers.Success(ants, _) =>
+        var pos = start
+        val result = new mutable.ListBuffer[Cite]
+        for (ant <- ants) {
+          ant match {
+            case Antiquote.Control(source) =>
+              for {
+                head <- Symbol.iterator(source).nextOption
+                kind <- Symbol.control_name(Symbol.encode(head))
+              } {
+                val rest = source.substring(head.length)
+                val (body, pos1) =
+                  if (rest.isEmpty) (rest, pos)
+                  else (Scan.Parsers.cartouche_content(rest), pos.advance(Symbol.open))
+                result += Cite(kind, body, pos1)
+              }
+            case _ =>
+          }
+          pos = pos.advance(ant.source)
+        }
+        result.toList
+      case _ => error("Unexpected failure parsing special antiquotations:\n" + text)
+    }
+  }
+
+  def parse_citations_latex(
+    cite_commands: List[String],
+    text: String,
+    start: Isar_Token.Pos = Isar_Token.Pos.start
+  ): Latex.Text = parse_citations(cite_commands, text, start = start).flatMap(_.nocite_latex)
 }
--- a/src/Pure/Thy/thy_header.scala	Wed Jan 18 16:49:01 2023 +0100
+++ b/src/Pure/Thy/thy_header.scala	Thu Jan 19 11:23:44 2023 +0100
@@ -59,7 +59,7 @@
       (THEORY, Keyword.Spec(kind = Keyword.THY_BEGIN, tags = List("theory"))),
       ("ML", Keyword.Spec(kind = Keyword.THY_DECL, tags = List("ML"))))
 
-  private val bootstrap_keywords =
+  val bootstrap_keywords: Keyword.Keywords =
     Keyword.Keywords.empty.add_keywords(bootstrap_header)
 
   val bootstrap_syntax: Outer_Syntax =