src/Pure/Thy/bibtex.scala
author wenzelm
Sun, 15 Jan 2023 16:28:03 +0100
changeset 76986 1e31ddcab458
parent 76984 29432d4a376d
child 77007 19a7046f90f9
permissions -rw-r--r--
clarified treatment of cite macro name;

/*  Title:      Pure/Thy/bibtex.scala
    Author:     Makarius

BibTeX support.
*/

package isabelle


import java.io.{File => JFile}

import scala.collection.mutable
import scala.util.parsing.combinator.RegexParsers
import scala.util.parsing.input.Reader
import scala.util.matching.Regex


object Bibtex {
  /** file format **/

  class File_Format extends isabelle.File_Format {
    val format_name: String = "bibtex"
    val file_ext: String = "bib"

    override def parse_data(name: String, text: String): Bibtex.Entries =
      Bibtex.Entries.parse(text, file_pos = name)

    override def theory_suffix: String = "bibtex_file"
    override def theory_content(name: String): String =
      """theory "bib" imports Pure begin bibtex_file "." end"""
    override def theory_excluded(name: String): Boolean = name == "bib"

    override def html_document(snapshot: Document.Snapshot): Option[Browser_Info.HTML_Document] = {
      val name = snapshot.node_name
      if (detect(name.node)) {
        val title = "Bibliography " + quote(snapshot.node_name.file_name)
        val content =
          Isabelle_System.with_tmp_file("bib", "bib") { bib =>
            File.write(bib, snapshot.node.source)
            Bibtex.html_output(List(bib), style = "unsort", title = title)
          }
        Some(Browser_Info.HTML_Document(title, content))
      }
      else None
    }
  }



  /** bibtex errors **/

  def bibtex_errors(dir: Path, root_name: String): List[String] = {
    val log_path = dir + Path.explode(root_name).ext("blg")
    if (log_path.is_file) {
      val Error1 = """^(I couldn't open database file .+)$""".r
      val Error2 = """^(I found no .+)$""".r
      val Error3 = """^(.+)---line (\d+) of file (.+)""".r
      Line.logical_lines(File.read(log_path)).flatMap(line =>
        line match {
          case Error1(msg) => Some("Bibtex error: " + msg)
          case Error2(msg) => Some("Bibtex error: " + msg)
          case Error3(msg, Value.Int(l), file) =>
            val path = File.standard_path(file)
            if (Path.is_wellformed(path)) {
              val pos = Position.Line_File(l, (dir + Path.explode(path)).canonical.implode)
              Some("Bibtex error" + Position.here(pos) + ":\n  " + msg)
            }
            else None
          case _ => None
        })
    }
    else Nil
  }



  /** check database **/

  def check_database(database: String): (List[(String, Position.T)], List[(String, Position.T)]) = {
    val chunks = parse(Line.normalize(database))
    var chunk_pos = Map.empty[String, Position.T]
    val tokens = new mutable.ListBuffer[(Token, Position.T)]
    var line = 1
    var offset = 1

    def make_pos(length: Int): Position.T =
      Position.Offset(offset) ::: Position.End_Offset(offset + length) ::: Position.Line(line)

    def advance_pos(tok: Token): Unit = {
      for (s <- Symbol.iterator(tok.source)) {
        if (Symbol.is_newline(s)) line += 1
        offset += 1
      }
    }

    def get_line_pos(l: Int): Position.T =
      if (0 < l && l <= tokens.length) tokens(l - 1)._2 else Position.none

    for (chunk <- chunks) {
      val name = chunk.name
      if (name != "" && !chunk_pos.isDefinedAt(name)) {
        chunk_pos += (name -> make_pos(chunk.heading_length))
      }
      for (tok <- chunk.tokens) {
        tokens += (tok.copy(source = tok.source.replace("\n", " ")) -> make_pos(tok.source.length))
        advance_pos(tok)
      }
    }

    Isabelle_System.with_tmp_dir("bibtex") { tmp_dir =>
      File.write(tmp_dir + Path.explode("root.bib"),
        tokens.iterator.map(p => p._1.source).mkString("", "\n", "\n"))
      File.write(tmp_dir + Path.explode("root.aux"),
        "\\bibstyle{plain}\n\\bibdata{root}\n\\citation{*}")
      Isabelle_System.bash("\"$ISABELLE_BIBTEX\" root", cwd = tmp_dir.file)

      val Error = """^(.*)---line (\d+) of file root.bib$""".r
      val Warning = """^Warning--(.+)$""".r
      val Warning_Line = """--line (\d+) of file root.bib$""".r
      val Warning_in_Chunk = """^Warning--(.+) in (.+)$""".r

      val log_file = tmp_dir + Path.explode("root.blg")
      val lines = if (log_file.is_file) Line.logical_lines(File.read(log_file)) else Nil

      val (errors, warnings) =
        if (lines.isEmpty) (Nil, Nil)
        else {
          lines.zip(lines.tail ::: List("")).flatMap(
            {
              case (Error(msg, Value.Int(l)), _) =>
                Some((true, (msg, get_line_pos(l))))
              case (Warning_in_Chunk(msg, name), _) if chunk_pos.isDefinedAt(name) =>
                Some((false, (Word.capitalize(msg + " in entry " + quote(name)), chunk_pos(name))))
              case (Warning(msg), Warning_Line(Value.Int(l))) =>
                Some((false, (Word.capitalize(msg), get_line_pos(l))))
              case (Warning(msg), _) =>
                Some((false, (Word.capitalize(msg), Position.none)))
              case _ => None
            }
          ).partition(_._1)
        }
      (errors.map(_._2), warnings.map(_._2))
    }
  }

  object Check_Database extends Scala.Fun_String("bibtex_check_database") {
    val here = Scala_Project.here
    def apply(database: String): String = {
      import XML.Encode._
      YXML.string_of_body(pair(list(pair(string, properties)), list(pair(string, properties)))(
        check_database(database)))
    }
  }



  /** document model **/

  /* entries */

  object Entries {
    val empty: Entries = apply(Nil)

    def apply(entries: List[Text.Info[String]], errors: List[String] = Nil): Entries =
      new Entries(entries, errors)

    def parse(text: String, file_pos: String = ""): Entries = {
      val entries = new mutable.ListBuffer[Text.Info[String]]
      var offset = 0
      var line = 1
      var err_line = 0

      try {
        for (chunk <- Bibtex.parse(text)) {
          val end_offset = offset + chunk.source.length
          if (chunk.name != "" && !chunk.is_command) {
            entries += Text.Info(Text.Range(offset, end_offset), chunk.name)
          }
          if (chunk.is_malformed && err_line == 0) { err_line = line }
          offset = end_offset
          line += chunk.source.count(_ == '\n')
        }

        val err_pos =
          if (err_line == 0 || file_pos.isEmpty) Position.none
          else Position.Line_File(err_line, file_pos)
        val errors =
          if (err_line == 0) Nil
          else List("Malformed bibtex file" + Position.here(err_pos))

        apply(entries.toList, errors = errors)
      }
      catch { case ERROR(msg) => apply(Nil, errors = List(msg)) }
    }

    def iterator[A <: Document.Model](models: Iterable[A]): Iterator[Text.Info[(String, A)]] =
      for {
        model <- models.iterator
        bibtex_entries <- model.get_data(classOf[Entries]).iterator
        info <- bibtex_entries.entries.iterator
      } yield info.map((_, model))
  }

  final class Entries private(val entries: List[Text.Info[String]], val errors: List[String]) {
    override def toString: String = "Bibtex.Entries(" + entries.length + ")"

    def ::: (other: Entries): Entries =
      new Entries(entries ::: other.entries, errors ::: other.errors)
  }


  /* completion */

  def completion[A <: Document.Model](
    history: Completion.History,
    rendering: Rendering,
    caret: Text.Offset,
    models: Iterable[A]
  ): Option[Completion.Result] = {
    for {
      Text.Info(r, name) <- rendering.citations(rendering.before_caret_range(caret)).headOption
      name1 <- Completion.clean_name(name)

      original <- rendering.get_text(r)
      original1 <- Completion.clean_name(Library.perhaps_unquote(original))

      entries =
        (for {
          Text.Info(_, (entry, _)) <- Entries.iterator(models)
          if entry.toLowerCase.containsSlice(name1.toLowerCase) && entry != original1
        } yield entry).toList
      if entries.nonEmpty

      items =
        entries.sorted.map({
          case entry =>
            val full_name = Long_Name.qualify(Markup.CITATION, entry)
            val description = List(entry, "(BibTeX entry)")
            val replacement = quote(entry)
            Completion.Item(r, original, full_name, description, replacement, 0, false)
        }).sorted(history.ordering).take(rendering.options.int("completion_limit"))
    } yield Completion.Result(r, original, false, items)
  }



  /** content **/

  private val months = List(
    "jan",
    "feb",
    "mar",
    "apr",
    "may",
    "jun",
    "jul",
    "aug",
    "sep",
    "oct",
    "nov",
    "dec")
  def is_month(s: String): Boolean = months.contains(s.toLowerCase)

  private val commands = List("preamble", "string")
  def is_command(s: String): Boolean = commands.contains(s.toLowerCase)

  sealed case class Entry(
    kind: String,
    required: List[String],
    optional_crossref: List[String],
    optional_other: List[String]
  ) {
    val optional_standard: List[String] = List("url", "doi", "ee")

    def is_required(s: String): Boolean = required.contains(s.toLowerCase)
    def is_optional(s: String): Boolean =
      optional_crossref.contains(s.toLowerCase) ||
      optional_other.contains(s.toLowerCase) ||
      optional_standard.contains(s.toLowerCase)

    def fields: List[String] =
      required ::: optional_crossref ::: optional_other ::: optional_standard

    def template: String =
      "@" + kind + "{,\n" + fields.map(x => "  " + x + " = {},\n").mkString + "}\n"
  }

  val known_entries: List[Entry] =
    List(
      Entry("Article",
        List("author", "title"),
        List("journal", "year"),
        List("volume", "number", "pages", "month", "note")),
      Entry("InProceedings",
        List("author", "title"),
        List("booktitle", "year"),
        List("editor", "volume", "number", "series", "pages", "month", "address",
          "organization", "publisher", "note")),
      Entry("InCollection",
        List("author", "title", "booktitle"),
        List("publisher", "year"),
        List("editor", "volume", "number", "series", "type", "chapter", "pages",
          "edition", "month", "address", "note")),
      Entry("InBook",
        List("author", "editor", "title", "chapter"),
        List("publisher", "year"),
        List("volume", "number", "series", "type", "address", "edition", "month", "pages", "note")),
      Entry("Proceedings",
        List("title", "year"),
        List(),
        List("booktitle", "editor", "volume", "number", "series", "address", "month",
          "organization", "publisher", "note")),
      Entry("Book",
        List("author", "editor", "title"),
        List("publisher", "year"),
        List("volume", "number", "series", "address", "edition", "month", "note")),
      Entry("Booklet",
        List("title"),
        List(),
        List("author", "howpublished", "address", "month", "year", "note")),
      Entry("PhdThesis",
        List("author", "title", "school", "year"),
        List(),
        List("type", "address", "month", "note")),
      Entry("MastersThesis",
        List("author", "title", "school", "year"),
        List(),
        List("type", "address", "month", "note")),
      Entry("TechReport",
        List("author", "title", "institution", "year"),
        List(),
        List("type", "number", "address", "month", "note")),
      Entry("Manual",
        List("title"),
        List(),
        List("author", "organization", "address", "edition", "month", "year", "note")),
      Entry("Unpublished",
        List("author", "title", "note"),
        List(),
        List("month", "year")),