src/Pure/Thy/bibtex.scala
changeset 75393 87ebf5a50283
parent 73730 2f023b2b0e1e
child 75394 42267c650205
equal deleted inserted replaced
75388:b3ca4a6ed74b 75393:87ebf5a50283
    12 import scala.collection.mutable
    12 import scala.collection.mutable
    13 import scala.util.parsing.combinator.RegexParsers
    13 import scala.util.parsing.combinator.RegexParsers
    14 import scala.util.parsing.input.Reader
    14 import scala.util.parsing.input.Reader
    15 
    15 
    16 
    16 
    17 object Bibtex
    17 object Bibtex {
    18 {
       
    19   /** file format **/
    18   /** file format **/
    20 
    19 
    21   def is_bibtex(name: String): Boolean = name.endsWith(".bib")
    20   def is_bibtex(name: String): Boolean = name.endsWith(".bib")
    22 
    21 
    23   class File_Format extends isabelle.File_Format
    22   class File_Format extends isabelle.File_Format {
    24   {
       
    25     val format_name: String = "bibtex"
    23     val format_name: String = "bibtex"
    26     val file_ext: String = "bib"
    24     val file_ext: String = "bib"
    27 
    25 
    28     override def theory_suffix: String = "bibtex_file"
    26     override def theory_suffix: String = "bibtex_file"
    29     override def theory_content(name: String): String =
    27     override def theory_content(name: String): String =
    30       """theory "bib" imports Pure begin bibtex_file """ +
    28       """theory "bib" imports Pure begin bibtex_file """ +
    31         Outer_Syntax.quote_string(name) + """ end"""
    29         Outer_Syntax.quote_string(name) + """ end"""
    32 
    30 
    33     override def html_document(snapshot: Document.Snapshot): Option[Presentation.HTML_Document] =
    31     override def html_document(snapshot: Document.Snapshot): Option[Presentation.HTML_Document] = {
    34     {
       
    35       val name = snapshot.node_name
    32       val name = snapshot.node_name
    36       if (detect(name.node)) {
    33       if (detect(name.node)) {
    37         val title = "Bibliography " + quote(snapshot.node_name.path.file_name)
    34         val title = "Bibliography " + quote(snapshot.node_name.path.file_name)
    38         val content =
    35         val content =
    39           Isabelle_System.with_tmp_file("bib", "bib") { bib =>
    36           Isabelle_System.with_tmp_file("bib", "bib") { bib =>
    48 
    45 
    49 
    46 
    50 
    47 
    51   /** bibtex errors **/
    48   /** bibtex errors **/
    52 
    49 
    53   def bibtex_errors(dir: Path, root_name: String): List[String] =
    50   def bibtex_errors(dir: Path, root_name: String): List[String] = {
    54   {
       
    55     val log_path = dir + Path.explode(root_name).ext("blg")
    51     val log_path = dir + Path.explode(root_name).ext("blg")
    56     if (log_path.is_file) {
    52     if (log_path.is_file) {
    57       val Error1 = """^(I couldn't open database file .+)$""".r
    53       val Error1 = """^(I couldn't open database file .+)$""".r
    58       val Error2 = """^(I found no .+)$""".r
    54       val Error2 = """^(I found no .+)$""".r
    59       val Error3 = """^(.+)---line (\d+) of file (.+)""".r
    55       val Error3 = """^(.+)---line (\d+) of file (.+)""".r
    76 
    72 
    77 
    73 
    78 
    74 
    79   /** check database **/
    75   /** check database **/
    80 
    76 
    81   def check_database(database: String): (List[(String, Position.T)], List[(String, Position.T)]) =
    77   def check_database(database: String): (List[(String, Position.T)], List[(String, Position.T)]) = {
    82   {
       
    83     val chunks = parse(Line.normalize(database))
    78     val chunks = parse(Line.normalize(database))
    84     var chunk_pos = Map.empty[String, Position.T]
    79     var chunk_pos = Map.empty[String, Position.T]
    85     val tokens = new mutable.ListBuffer[(Token, Position.T)]
    80     val tokens = new mutable.ListBuffer[(Token, Position.T)]
    86     var line = 1
    81     var line = 1
    87     var offset = 1
    82     var offset = 1
    88 
    83 
    89     def make_pos(length: Int): Position.T =
    84     def make_pos(length: Int): Position.T =
    90       Position.Offset(offset) ::: Position.End_Offset(offset + length) ::: Position.Line(line)
    85       Position.Offset(offset) ::: Position.End_Offset(offset + length) ::: Position.Line(line)
    91 
    86 
    92     def advance_pos(tok: Token): Unit =
    87     def advance_pos(tok: Token): Unit = {
    93     {
       
    94       for (s <- Symbol.iterator(tok.source)) {
    88       for (s <- Symbol.iterator(tok.source)) {
    95         if (Symbol.is_newline(s)) line += 1
    89         if (Symbol.is_newline(s)) line += 1
    96         offset += 1
    90         offset += 1
    97       }
    91       }
    98     }
    92     }
   109         tokens += (tok.copy(source = tok.source.replace("\n", " ")) -> make_pos(tok.source.length))
   103         tokens += (tok.copy(source = tok.source.replace("\n", " ")) -> make_pos(tok.source.length))
   110         advance_pos(tok)
   104         advance_pos(tok)
   111       }
   105       }
   112     }
   106     }
   113 
   107 
   114     Isabelle_System.with_tmp_dir("bibtex")(tmp_dir =>
   108     Isabelle_System.with_tmp_dir("bibtex")(tmp_dir => {
   115     {
       
   116       File.write(tmp_dir + Path.explode("root.bib"),
   109       File.write(tmp_dir + Path.explode("root.bib"),
   117         tokens.iterator.map(p => p._1.source).mkString("", "\n", "\n"))
   110         tokens.iterator.map(p => p._1.source).mkString("", "\n", "\n"))
   118       File.write(tmp_dir + Path.explode("root.aux"),
   111       File.write(tmp_dir + Path.explode("root.aux"),
   119         "\\bibstyle{plain}\n\\bibdata{root}\n\\citation{*}")
   112         "\\bibstyle{plain}\n\\bibdata{root}\n\\citation{*}")
   120       Isabelle_System.bash("\"$ISABELLE_BIBTEX\" root", cwd = tmp_dir.file)
   113       Isabelle_System.bash("\"$ISABELLE_BIBTEX\" root", cwd = tmp_dir.file)
   146         }
   139         }
   147       (errors.map(_._2), warnings.map(_._2))
   140       (errors.map(_._2), warnings.map(_._2))
   148     })
   141     })
   149   }
   142   }
   150 
   143 
   151   object Check_Database extends Scala.Fun_String("bibtex_check_database")
   144   object Check_Database extends Scala.Fun_String("bibtex_check_database") {
   152   {
       
   153     val here = Scala_Project.here
   145     val here = Scala_Project.here
   154     def apply(database: String): String =
   146     def apply(database: String): String = {
   155     {
       
   156       import XML.Encode._
   147       import XML.Encode._
   157       YXML.string_of_body(pair(list(pair(string, properties)), list(pair(string, properties)))(
   148       YXML.string_of_body(pair(list(pair(string, properties)), list(pair(string, properties)))(
   158         check_database(database)))
   149         check_database(database)))
   159     }
   150     }
   160   }
   151   }
   163 
   154 
   164   /** document model **/
   155   /** document model **/
   165 
   156 
   166   /* entries */
   157   /* entries */
   167 
   158 
   168   def entries(text: String): List[Text.Info[String]] =
   159   def entries(text: String): List[Text.Info[String]] = {
   169   {
       
   170     val result = new mutable.ListBuffer[Text.Info[String]]
   160     val result = new mutable.ListBuffer[Text.Info[String]]
   171     var offset = 0
   161     var offset = 0
   172     for (chunk <- Bibtex.parse(text)) {
   162     for (chunk <- Bibtex.parse(text)) {
   173       val end_offset = offset + chunk.source.length
   163       val end_offset = offset + chunk.source.length
   174       if (chunk.name != "" && !chunk.is_command)
   164       if (chunk.name != "" && !chunk.is_command)
   176       offset = end_offset
   166       offset = end_offset
   177     }
   167     }
   178     result.toList
   168     result.toList
   179   }
   169   }
   180 
   170 
   181   def entries_iterator[A, B <: Document.Model](models: Map[A, B])
   171   def entries_iterator[A, B <: Document.Model](
   182     : Iterator[Text.Info[(String, B)]] =
   172     models: Map[A, B]
   183   {
   173   ): Iterator[Text.Info[(String, B)]] = {
   184     for {
   174     for {
   185       (_, model) <- models.iterator
   175       (_, model) <- models.iterator
   186       info <- model.bibtex_entries.iterator
   176       info <- model.bibtex_entries.iterator
   187     } yield info.map((_, model))
   177     } yield info.map((_, model))
   188   }
   178   }
   189 
   179 
   190 
   180 
   191   /* completion */
   181   /* completion */
   192 
   182 
   193   def completion[A, B <: Document.Model](
   183   def completion[A, B <: Document.Model](
   194     history: Completion.History, rendering: Rendering, caret: Text.Offset,
   184     history: Completion.History,
   195     models: Map[A, B]): Option[Completion.Result] =
   185     rendering: Rendering,
   196   {
   186     caret: Text.Offset,
       
   187     models: Map[A, B]
       
   188   ): Option[Completion.Result] = {
   197     for {
   189     for {
   198       Text.Info(r, name) <- rendering.citations(rendering.before_caret_range(caret)).headOption
   190       Text.Info(r, name) <- rendering.citations(rendering.before_caret_range(caret)).headOption
   199       name1 <- Completion.clean_name(name)
   191       name1 <- Completion.clean_name(name)
   200 
   192 
   201       original <- rendering.get_text(r)
   193       original <- rendering.get_text(r)
   243 
   235 
   244   sealed case class Entry(
   236   sealed case class Entry(
   245     kind: String,
   237     kind: String,
   246     required: List[String],
   238     required: List[String],
   247     optional_crossref: List[String],
   239     optional_crossref: List[String],
   248     optional_other: List[String])
   240     optional_other: List[String]
   249   {
   241   ) {
   250     val optional_standard: List[String] = List("url", "doi", "ee")
   242     val optional_standard: List[String] = List("url", "doi", "ee")
   251 
   243 
   252     def is_required(s: String): Boolean = required.contains(s.toLowerCase)
   244     def is_required(s: String): Boolean = required.contains(s.toLowerCase)
   253     def is_optional(s: String): Boolean =
   245     def is_optional(s: String): Boolean =
   254       optional_crossref.contains(s.toLowerCase) ||
   246       optional_crossref.contains(s.toLowerCase) ||
   327 
   319 
   328 
   320 
   329 
   321 
   330   /** tokens and chunks **/
   322   /** tokens and chunks **/
   331 
   323 
   332   object Token
   324   object Token {
   333   {
   325     object Kind extends Enumeration {
   334     object Kind extends Enumeration
       
   335     {
       
   336       val COMMAND = Value("command")
   326       val COMMAND = Value("command")
   337       val ENTRY = Value("entry")
   327       val ENTRY = Value("entry")
   338       val KEYWORD = Value("keyword")
   328       val KEYWORD = Value("keyword")
   339       val NAT = Value("natural number")
   329       val NAT = Value("natural number")
   340       val STRING = Value("string")
   330       val STRING = Value("string")
   344       val COMMENT = Value("ignored text")
   334       val COMMENT = Value("ignored text")
   345       val ERROR = Value("bad input")
   335       val ERROR = Value("bad input")
   346     }
   336     }
   347   }
   337   }
   348 
   338 
   349   sealed case class Token(kind: Token.Kind.Value, source: String)
   339   sealed case class Token(kind: Token.Kind.Value, source: String) {
   350   {
       
   351     def is_kind: Boolean =
   340     def is_kind: Boolean =
   352       kind == Token.Kind.COMMAND ||
   341       kind == Token.Kind.COMMAND ||
   353       kind == Token.Kind.ENTRY ||
   342       kind == Token.Kind.ENTRY ||
   354       kind == Token.Kind.IDENT
   343       kind == Token.Kind.IDENT
   355     def is_name: Boolean =
   344     def is_name: Boolean =
   362       kind == Token.Kind.ERROR
   351       kind == Token.Kind.ERROR
   363     def is_open: Boolean =
   352     def is_open: Boolean =
   364       kind == Token.Kind.KEYWORD && (source == "{" || source == "(")
   353       kind == Token.Kind.KEYWORD && (source == "{" || source == "(")
   365   }
   354   }
   366 
   355 
   367   case class Chunk(kind: String, tokens: List[Token])
   356   case class Chunk(kind: String, tokens: List[Token]) {
   368   {
       
   369     val source = tokens.map(_.source).mkString
   357     val source = tokens.map(_.source).mkString
   370 
   358 
   371     private val content: Option[List[Token]] =
   359     private val content: Option[List[Token]] =
   372       tokens match {
   360       tokens match {
   373         case Token(Token.Kind.KEYWORD, "@") :: body if body.nonEmpty =>
   361         case Token(Token.Kind.KEYWORD, "@") :: body if body.nonEmpty =>
   421 
   409 
   422 
   410 
   423   // See also https://ctan.org/tex-archive/biblio/bibtex/base/bibtex.web
   411   // See also https://ctan.org/tex-archive/biblio/bibtex/base/bibtex.web
   424   // module @<Scan for and process a \.{.bib} command or database entry@>.
   412   // module @<Scan for and process a \.{.bib} command or database entry@>.
   425 
   413 
   426   object Parsers extends RegexParsers
   414   object Parsers extends RegexParsers {
   427   {
       
   428     /* white space and comments */
   415     /* white space and comments */
   429 
   416 
   430     override val whiteSpace = "".r
   417     override val whiteSpace = "".r
   431 
   418 
   432     private val space = """[ \t\n\r]+""".r ^^ token(Token.Kind.SPACE)
   419     private val space = """[ \t\n\r]+""".r ^^ token(Token.Kind.SPACE)
   445 
   432 
   446     /* delimited string: outermost "..." or {...} and body with balanced {...} */
   433     /* delimited string: outermost "..." or {...} and body with balanced {...} */
   447 
   434 
   448     // see also bibtex.web: scan_a_field_token_and_eat_white, scan_balanced_braces
   435     // see also bibtex.web: scan_a_field_token_and_eat_white, scan_balanced_braces
   449     private def delimited_depth(delim: Delimited): Parser[(String, Delimited)] =
   436     private def delimited_depth(delim: Delimited): Parser[(String, Delimited)] =
   450       new Parser[(String, Delimited)]
   437       new Parser[(String, Delimited)] {
   451       {
       
   452         require(if (delim.quoted) delim.depth > 0 else delim.depth >= 0,
   438         require(if (delim.quoted) delim.depth > 0 else delim.depth >= 0,
   453           "bad delimiter depth")
   439           "bad delimiter depth")
   454 
   440 
   455         def apply(in: Input) =
   441         def apply(in: Input) = {
   456         {
       
   457           val start = in.offset
   442           val start = in.offset
   458           val end = in.source.length
   443           val end = in.source.length
   459 
   444 
   460           var i = start
   445           var i = start
   461           var q = delim.quoted
   446           var q = delim.quoted
   565 
   550 
   566     /* chunks */
   551     /* chunks */
   567 
   552 
   568     val chunk: Parser[Chunk] = ignored | (item | recover_item)
   553     val chunk: Parser[Chunk] = ignored | (item | recover_item)
   569 
   554 
   570     def chunk_line(ctxt: Line_Context): Parser[(Chunk, Line_Context)] =
   555     def chunk_line(ctxt: Line_Context): Parser[(Chunk, Line_Context)] = {
   571     {
       
   572       ctxt match {
   556       ctxt match {
   573         case Ignored =>
   557         case Ignored =>
   574           ignored_line |
   558           ignored_line |
   575           at ^^ { case a => (Chunk("", List(a)), At) }
   559           at ^^ { case a => (Chunk("", List(a)), At) }
   576 
   560 
   608     Parsers.parseAll(Parsers.rep(Parsers.chunk), Scan.char_reader(input)) match {
   592     Parsers.parseAll(Parsers.rep(Parsers.chunk), Scan.char_reader(input)) match {
   609       case Parsers.Success(result, _) => result
   593       case Parsers.Success(result, _) => result
   610       case _ => error("Unexpected failure to parse input:\n" + input.toString)
   594       case _ => error("Unexpected failure to parse input:\n" + input.toString)
   611     }
   595     }
   612 
   596 
   613   def parse_line(input: CharSequence, context: Line_Context): (List[Chunk], Line_Context) =
   597   def parse_line(input: CharSequence, context: Line_Context): (List[Chunk], Line_Context) = {
   614   {
       
   615     var in: Reader[Char] = Scan.char_reader(input)
   598     var in: Reader[Char] = Scan.char_reader(input)
   616     val chunks = new mutable.ListBuffer[Chunk]
   599     val chunks = new mutable.ListBuffer[Chunk]
   617     var ctxt = context
   600     var ctxt = context
   618     while (!in.atEnd) {
   601     while (!in.atEnd) {
   619       Parsers.parse(Parsers.chunk_line(ctxt), in) match {
   602       Parsers.parse(Parsers.chunk_line(ctxt), in) match {
   642   def html_output(bib: List[Path],
   625   def html_output(bib: List[Path],
   643     title: String = "Bibliography",
   626     title: String = "Bibliography",
   644     body: Boolean = false,
   627     body: Boolean = false,
   645     citations: List[String] = List("*"),
   628     citations: List[String] = List("*"),
   646     style: String = "",
   629     style: String = "",
   647     chronological: Boolean = false): String =
   630     chronological: Boolean = false
   648   {
   631   ): String = {
   649     Isabelle_System.with_tmp_dir("bibtex")(tmp_dir =>
   632     Isabelle_System.with_tmp_dir("bibtex")(tmp_dir => {
   650     {
       
   651       /* database files */
   633       /* database files */
   652 
   634 
   653       val bib_files = bib.map(_.drop_ext)
   635       val bib_files = bib.map(_.drop_ext)
   654       val bib_names =
   636       val bib_names = {
   655       {
       
   656         val names0 = bib_files.map(_.file_name)
   637         val names0 = bib_files.map(_.file_name)
   657         if (Library.duplicates(names0).isEmpty) names0
   638         if (Library.duplicates(names0).isEmpty) names0
   658         else names0.zipWithIndex.map({ case (name, i) => (i + 1).toString + "-" + name })
   639         else names0.zipWithIndex.map({ case (name, i) => (i + 1).toString + "-" + name })
   659       }
   640       }
   660 
   641