# HG changeset patch # User wenzelm # Date 1514117225 -3600 # Node ID 4588f714a78a8a5ea8321cdf110de2f8eb257a0d # Parent c573cfb2c407d44a9b92bf7d1ec7a9dfc82a2708 clarified directories; diff -r c573cfb2c407 -r 4588f714a78a src/Pure/ROOT.ML --- a/src/Pure/ROOT.ML Sun Dec 24 12:48:43 2017 +0100 +++ b/src/Pure/ROOT.ML Sun Dec 24 13:07:05 2017 +0100 @@ -283,6 +283,7 @@ ML_file "ML/ml_antiquotations.ML"; ML_file "Thy/thy_output.ML"; ML_file "Thy/document_antiquotations.ML"; +ML_file "Thy/bibtex.ML"; ML_file "General/graph_display.ML"; ML_file "pure_syn.ML"; ML_file "PIDE/command.ML"; @@ -319,7 +320,6 @@ ML_file "Tools/build.ML"; ML_file "Tools/named_thms.ML"; ML_file "Tools/print_operation.ML"; -ML_file "Tools/bibtex.ML"; ML_file "Tools/rail.ML"; ML_file "Tools/rule_insts.ML"; ML_file "Tools/thm_deps.ML"; diff -r c573cfb2c407 -r 4588f714a78a src/Pure/Thy/bibtex.ML --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/Pure/Thy/bibtex.ML Sun Dec 24 13:07:05 2017 +0100 @@ -0,0 +1,33 @@ +(* Title: Pure/Thy/bibtex.ML + Author: Makarius + +BibTeX support. +*) + +signature BIBTEX = +sig + val cite_macro: string Config.T +end; + +structure Bibtex: BIBTEX = +struct + +val cite_macro = Attrib.setup_config_string \<^binding>\cite_macro\ (K "cite"); + +val _ = + Theory.setup + (Thy_Output.add_option \<^binding>\cite_macro\ (Config.put cite_macro) #> + Thy_Output.antiquotation \<^binding>\cite\ + (Scan.lift + (Scan.option (Parse.verbatim || Parse.cartouche) -- + Parse.and_list1 (Parse.position Args.name))) + (fn {context = ctxt, ...} => fn (opt, citations) => + let + val _ = + Context_Position.reports ctxt + (map (fn (name, pos) => (pos, Markup.citation name)) citations); + val opt_arg = (case opt of NONE => "" | SOME s => "[" ^ s ^ "]"); + val arg = "{" ^ space_implode "," (map #1 citations) ^ "}"; + in "\\" ^ Config.get ctxt cite_macro ^ opt_arg ^ arg end)); + +end; diff -r c573cfb2c407 -r 4588f714a78a src/Pure/Thy/bibtex.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/Pure/Thy/bibtex.scala Sun Dec 24 13:07:05 2017 +0100 @@ -0,0 +1,649 @@ +/* Title: Pure/Thy/bibtex.scala + Author: Makarius + +BibTeX support. +*/ + +package isabelle + + +import scala.collection.mutable +import scala.util.parsing.combinator.RegexParsers +import scala.util.parsing.input.Reader + + +object Bibtex +{ + /** bibtex errors **/ + + def bibtex_errors(dir: Path, root_name: String): List[String] = + { + val log_path = dir + Path.explode(root_name).ext("blg") + if (log_path.is_file) { + val Error = """^(.*)---line (\d+) of file (.+)""".r + Line.logical_lines(File.read(log_path)).flatMap(line => + line match { + case Error(msg, Value.Int(l), file) => + val path = File.standard_path(file) + if (Path.is_wellformed(path)) { + val pos = Position.Line_File(l, (dir + Path.explode(path)).canonical.implode) + Some("Bibtex error" + Position.here(pos) + ":\n " + msg) + } + else None + case _ => None + }) + } + else Nil + } + + + + /** check database **/ + + sealed case class Message(is_error: Boolean, msg: String, pos: Position.T) + { + def output: Unit = + if (is_error) + Output.error_message("Bibtex error" + Position.here(pos) + ":\n " + msg) + else Output.warning("Bibtex warning" + Position.here(pos) + ":\n " + msg) + } + + def check_database(bib_file: Path): List[Message] = + { + val chunks = parse(Line.normalize(File.read(bib_file))) + + val tokens = new mutable.ListBuffer[(Token, Position.T)] + var line = 1 + var offset = 1 + var chunk_pos = Map.empty[String, Position.T] + val file_pos = bib_file.expand.position + + def token_pos(tok: Token): Position.T = + Position.Offset(offset) ::: Position.End_Offset(offset + tok.source.length) ::: + Position.Line(line) ::: file_pos + + def advance_pos(tok: Token) + { + for (s <- Symbol.iterator(tok.source)) { + if (Symbol.is_newline(s)) line += 1 + offset += 1 + } + } + + def get_line_pos(l: Int): Position.T = + if (0 < l && l <= tokens.length) tokens(l - 1)._2 else Position.none + + for (chunk <- chunks) { + val name = chunk.name + if (name != "" && !chunk_pos.isDefinedAt(name)) { + chunk_pos += (name -> token_pos(chunk.tokens.head)) + } + for (tok <- chunk.tokens) { + tokens += (tok.copy(source = tok.source.replace("\n", " ")) -> token_pos(tok)) + advance_pos(tok) + } + } + + Isabelle_System.with_tmp_dir("bibtex")(tmp_dir => + { + File.write(tmp_dir + Path.explode("root.bib"), + tokens.iterator.map(p => p._1.source).mkString("", "\n", "\n")) + File.write(tmp_dir + Path.explode("root.aux"), + "\\bibstyle{plain}\n\\bibdata{root}\n\\citation{*}") + Isabelle_System.bash("\"$ISABELLE_BIBTEX\" root", cwd = tmp_dir.file) + + val Error = """^(.*)---line (\d+) of file root.bib$""".r + val Warning = """^Warning--(.+)$""".r + val Warning_Line = """--line (\d+) of file root.bib$""".r + val Warning_in_Chunk = """^Warning--(.+) in (.+)$""".r + + val log_file = tmp_dir + Path.explode("root.blg") + val lines = if (log_file.is_file) Line.logical_lines(File.read(log_file)) else Nil + + lines.zip(lines.tail ::: List("")).flatMap( + { + case (Error(msg, Value.Int(l)), _) => + Some(Message(true, msg, get_line_pos(l))) + case (Warning_in_Chunk(msg, name), _) if chunk_pos.isDefinedAt(name) => + Some(Message(false, Word.capitalize(msg + " in entry " + quote(name)), chunk_pos(name))) + case (Warning(msg), Warning_Line(Value.Int(l))) => + Some(Message(false, Word.capitalize(msg), get_line_pos(l))) + case (Warning(msg), _) => + Some(Message(false, Word.capitalize(msg), Position.none)) + case _ => None + } + ) + }) + } + + + + /** document model **/ + + def check_name(name: String): Boolean = name.endsWith(".bib") + def check_name(name: Document.Node.Name): Boolean = check_name(name.node) + + def entries(text: String): List[Text.Info[String]] = + { + val result = new mutable.ListBuffer[Text.Info[String]] + var offset = 0 + for (chunk <- Bibtex.parse(text)) { + val end_offset = offset + chunk.source.length + if (chunk.name != "" && !chunk.is_command) + result += Text.Info(Text.Range(offset, end_offset), chunk.name) + offset = end_offset + } + result.toList + } + + def entries_iterator[A, B <: Document.Model](models: Map[A, B]) + : Iterator[Text.Info[(String, B)]] = + { + for { + (_, model) <- models.iterator + info <- model.bibtex_entries.iterator + } yield info.map((_, model)) + } + + + /* completion */ + + def completion[A, B <: Document.Model]( + history: Completion.History, rendering: Rendering, caret: Text.Offset, + models: Map[A, B]): Option[Completion.Result] = + { + for { + Text.Info(r, name) <- rendering.citation(rendering.before_caret_range(caret)) + name1 <- Completion.clean_name(name) + + original <- rendering.model.get_text(r) + original1 <- Completion.clean_name(Library.perhaps_unquote(original)) + + entries = + (for { + Text.Info(_, (entry, _)) <- entries_iterator(models) + if entry.toLowerCase.containsSlice(name1.toLowerCase) && entry != original1 + } yield entry).toList + if entries.nonEmpty + + items = + entries.sorted.map({ + case entry => + val full_name = Long_Name.qualify(Markup.CITATION, entry) + val description = List(entry, "(BibTeX entry)") + val replacement = quote(entry) + Completion.Item(r, original, full_name, description, replacement, 0, false) + }).sorted(history.ordering).take(rendering.options.int("completion_limit")) + } yield Completion.Result(r, original, false, items) + } + + + + /** content **/ + + private val months = List( + "jan", + "feb", + "mar", + "apr", + "may", + "jun", + "jul", + "aug", + "sep", + "oct", + "nov", + "dec") + def is_month(s: String): Boolean = months.contains(s.toLowerCase) + + private val commands = List("preamble", "string") + def is_command(s: String): Boolean = commands.contains(s.toLowerCase) + + sealed case class Entry( + kind: String, + required: List[String], + optional_crossref: List[String], + optional_other: List[String]) + { + def is_required(s: String): Boolean = required.contains(s.toLowerCase) + def is_optional(s: String): Boolean = + optional_crossref.contains(s.toLowerCase) || optional_other.contains(s.toLowerCase) + + def fields: List[String] = required ::: optional_crossref ::: optional_other + def template: String = + "@" + kind + "{,\n" + fields.map(x => " " + x + " = {},\n").mkString + "}\n" + } + + val known_entries: List[Entry] = + List( + Entry("Article", + List("author", "title"), + List("journal", "year"), + List("volume", "number", "pages", "month", "note")), + Entry("InProceedings", + List("author", "title"), + List("booktitle", "year"), + List("editor", "volume", "number", "series", "pages", "month", "address", + "organization", "publisher", "note")), + Entry("InCollection", + List("author", "title", "booktitle"), + List("publisher", "year"), + List("editor", "volume", "number", "series", "type", "chapter", "pages", + "edition", "month", "address", "note")), + Entry("InBook", + List("author", "editor", "title", "chapter"), + List("publisher", "year"), + List("volume", "number", "series", "type", "address", "edition", "month", "pages", "note")), + Entry("Proceedings", + List("title", "year"), + List(), + List("booktitle", "editor", "volume", "number", "series", "address", "month", + "organization", "publisher", "note")), + Entry("Book", + List("author", "editor", "title"), + List("publisher", "year"), + List("volume", "number", "series", "address", "edition", "month", "note")), + Entry("Booklet", + List("title"), + List(), + List("author", "howpublished", "address", "month", "year", "note")), + Entry("PhdThesis", + List("author", "title", "school", "year"), + List(), + List("type", "address", "month", "note")), + Entry("MastersThesis", + List("author", "title", "school", "year"), + List(), + List("type", "address", "month", "note")), + Entry("TechReport", + List("author", "title", "institution", "year"), + List(), + List("type", "number", "address", "month", "note")), + Entry("Manual", + List("title"), + List(), + List("author", "organization", "address", "edition", "month", "year", "note")), + Entry("Unpublished", + List("author", "title", "note"), + List(), + List("month", "year")), + Entry("Misc", + List(), + List(), + List("author", "title", "howpublished", "month", "year", "note"))) + + def get_entry(kind: String): Option[Entry] = + known_entries.find(entry => entry.kind.toLowerCase == kind.toLowerCase) + + def is_entry(kind: String): Boolean = get_entry(kind).isDefined + + + + /** tokens and chunks **/ + + object Token + { + object Kind extends Enumeration + { + val COMMAND = Value("command") + val ENTRY = Value("entry") + val KEYWORD = Value("keyword") + val NAT = Value("natural number") + val STRING = Value("string") + val NAME = Value("name") + val IDENT = Value("identifier") + val SPACE = Value("white space") + val COMMENT = Value("ignored text") + val ERROR = Value("bad input") + } + } + + sealed case class Token(kind: Token.Kind.Value, source: String) + { + def is_kind: Boolean = + kind == Token.Kind.COMMAND || + kind == Token.Kind.ENTRY || + kind == Token.Kind.IDENT + def is_name: Boolean = + kind == Token.Kind.NAME || + kind == Token.Kind.IDENT + def is_ignored: Boolean = + kind == Token.Kind.SPACE || + kind == Token.Kind.COMMENT + def is_malformed: Boolean = kind == + Token.Kind.ERROR + } + + case class Chunk(kind: String, tokens: List[Token]) + { + val source = tokens.map(_.source).mkString + + private val content: Option[List[Token]] = + tokens match { + case Token(Token.Kind.KEYWORD, "@") :: body if body.nonEmpty => + (body.init.filterNot(_.is_ignored), body.last) match { + case (tok :: Token(Token.Kind.KEYWORD, "{") :: toks, Token(Token.Kind.KEYWORD, "}")) + if tok.is_kind => Some(toks) + + case (tok :: Token(Token.Kind.KEYWORD, "(") :: toks, Token(Token.Kind.KEYWORD, ")")) + if tok.is_kind => Some(toks) + + case _ => None + } + case _ => None + } + + def name: String = + content match { + case Some(tok :: _) if tok.is_name => tok.source + case _ => "" + } + + def is_ignored: Boolean = kind == "" && tokens.forall(_.is_ignored) + def is_malformed: Boolean = kind == "" || tokens.exists(_.is_malformed) + def is_command: Boolean = Bibtex.is_command(kind) && name != "" && content.isDefined + def is_entry: Boolean = Bibtex.is_entry(kind) && name != "" && content.isDefined + } + + + + /** parsing **/ + + // context of partial line-oriented scans + abstract class Line_Context + case object Ignored extends Line_Context + case object At extends Line_Context + case class Item_Start(kind: String) extends Line_Context + case class Item_Open(kind: String, end: String) extends Line_Context + case class Item(kind: String, end: String, delim: Delimited) extends Line_Context + + case class Delimited(quoted: Boolean, depth: Int) + val Closed = Delimited(false, 0) + + private def token(kind: Token.Kind.Value)(source: String): Token = Token(kind, source) + private def keyword(source: String): Token = Token(Token.Kind.KEYWORD, source) + + + // See also http://ctan.org/tex-archive/biblio/bibtex/base/bibtex.web + // module @. + + object Parsers extends RegexParsers + { + /* white space and comments */ + + override val whiteSpace = "".r + + private val space = """[ \t\n\r]+""".r ^^ token(Token.Kind.SPACE) + private val spaces = rep(space) + + + /* ignored text */ + + private val ignored: Parser[Chunk] = + rep1("""(?i)([^@]+|@[ \t]*comment)""".r) ^^ { + case ss => Chunk("", List(Token(Token.Kind.COMMENT, ss.mkString))) } + + private def ignored_line: Parser[(Chunk, Line_Context)] = + ignored ^^ { case a => (a, Ignored) } + + + /* delimited string: outermost "..." or {...} and body with balanced {...} */ + + // see also bibtex.web: scan_a_field_token_and_eat_white, scan_balanced_braces + private def delimited_depth(delim: Delimited): Parser[(String, Delimited)] = + new Parser[(String, Delimited)] + { + require(if (delim.quoted) delim.depth > 0 else delim.depth >= 0) + + def apply(in: Input) = + { + val start = in.offset + val end = in.source.length + + var i = start + var q = delim.quoted + var d = delim.depth + var finished = false + while (!finished && i < end) { + val c = in.source.charAt(i) + + if (c == '"' && d == 0) { i += 1; d = 1; q = true } + else if (c == '"' && d == 1 && q) { + i += 1; d = 0; q = false; finished = true + } + else if (c == '{') { i += 1; d += 1 } + else if (c == '}') { + if (d == 1 && !q || d > 1) { i += 1; d -= 1; if (d == 0) finished = true } + else {i = start; finished = true } + } + else if (d > 0) i += 1 + else finished = true + } + if (i == start) Failure("bad input", in) + else { + val s = in.source.subSequence(start, i).toString + Success((s, Delimited(q, d)), in.drop(i - start)) + } + } + }.named("delimited_depth") + + private def delimited: Parser[Token] = + delimited_depth(Closed) ^? + { case (s, delim) if delim == Closed => Token(Token.Kind.STRING, s) } + + private def recover_delimited: Parser[Token] = + """["{][^@]*""".r ^^ token(Token.Kind.ERROR) + + def delimited_line(ctxt: Item): Parser[(Chunk, Line_Context)] = + delimited_depth(ctxt.delim) ^^ { case (s, delim1) => + (Chunk(ctxt.kind, List(Token(Token.Kind.STRING, s))), ctxt.copy(delim = delim1)) } | + recover_delimited ^^ { case a => (Chunk(ctxt.kind, List(a)), Ignored) } + + + /* other tokens */ + + private val at = "@" ^^ keyword + + private val nat = "[0-9]+".r ^^ token(Token.Kind.NAT) + + private val name = """[\x21-\x7f&&[^"#%'(),={}]]+""".r ^^ token(Token.Kind.NAME) + + private val identifier = + """[\x21-\x7f&&[^"#%'(),={}0-9]][\x21-\x7f&&[^"#%'(),={}]]*""".r + + private val ident = identifier ^^ token(Token.Kind.IDENT) + + val other_token = "[=#,]".r ^^ keyword | (nat | (ident | space)) + + + /* body */ + + private val body = + delimited | (recover_delimited | other_token) + + private def body_line(ctxt: Item) = + if (ctxt.delim.depth > 0) + delimited_line(ctxt) + else + delimited_line(ctxt) | + other_token ^^ { case a => (Chunk(ctxt.kind, List(a)), ctxt) } | + ctxt.end ^^ { case a => (Chunk(ctxt.kind, List(keyword(a))), Ignored) } + + + /* items: command or entry */ + + private val item_kind = + identifier ^^ { case a => + val kind = + if (is_command(a)) Token.Kind.COMMAND + else if (is_entry(a)) Token.Kind.ENTRY + else Token.Kind.IDENT + Token(kind, a) + } + + private val item_begin = + "{" ^^ { case a => ("}", keyword(a)) } | + "(" ^^ { case a => (")", keyword(a)) } + + private def item_name(kind: String) = + kind.toLowerCase match { + case "preamble" => failure("") + case "string" => identifier ^^ token(Token.Kind.NAME) + case _ => name + } + + private val item_start = + at ~ spaces ~ item_kind ~ spaces ^^ + { case a ~ b ~ c ~ d => (c.source, List(a) ::: b ::: List(c) ::: d) } + + private val item: Parser[Chunk] = + (item_start ~ item_begin ~ spaces) into + { case (kind, a) ~ ((end, b)) ~ c => + opt(item_name(kind)) ~ rep(body) ~ opt(end ^^ keyword) ^^ { + case d ~ e ~ f => Chunk(kind, a ::: List(b) ::: c ::: d.toList ::: e ::: f.toList) } } + + private val recover_item: Parser[Chunk] = + at ~ "[^@]*".r ^^ { case a ~ b => Chunk("", List(a, Token(Token.Kind.ERROR, b))) } + + + /* chunks */ + + val chunk: Parser[Chunk] = ignored | (item | recover_item) + + def chunk_line(ctxt: Line_Context): Parser[(Chunk, Line_Context)] = + { + ctxt match { + case Ignored => + ignored_line | + at ^^ { case a => (Chunk("", List(a)), At) } + + case At => + space ^^ { case a => (Chunk("", List(a)), ctxt) } | + item_kind ^^ { case a => (Chunk(a.source, List(a)), Item_Start(a.source)) } | + recover_item ^^ { case a => (a, Ignored) } | + ignored_line + + case Item_Start(kind) => + space ^^ { case a => (Chunk(kind, List(a)), ctxt) } | + item_begin ^^ { case (end, a) => (Chunk(kind, List(a)), Item_Open(kind, end)) } | + recover_item ^^ { case a => (a, Ignored) } | + ignored_line + + case Item_Open(kind, end) => + space ^^ { case a => (Chunk(kind, List(a)), ctxt) } | + item_name(kind) ^^ { case a => (Chunk(kind, List(a)), Item(kind, end, Closed)) } | + body_line(Item(kind, end, Closed)) | + ignored_line + + case item_ctxt: Item => + body_line(item_ctxt) | + ignored_line + + case _ => failure("") + } + } + } + + + /* parse */ + + def parse(input: CharSequence): List[Chunk] = + Parsers.parseAll(Parsers.rep(Parsers.chunk), Scan.char_reader(input)) match { + case Parsers.Success(result, _) => result + case _ => error("Unexpected failure to parse input:\n" + input.toString) + } + + def parse_line(input: CharSequence, context: Line_Context): (List[Chunk], Line_Context) = + { + var in: Reader[Char] = Scan.char_reader(input) + val chunks = new mutable.ListBuffer[Chunk] + var ctxt = context + while (!in.atEnd) { + Parsers.parse(Parsers.chunk_line(ctxt), in) match { + case Parsers.Success((x, c), rest) => chunks += x; ctxt = c; in = rest + case Parsers.NoSuccess(_, rest) => + error("Unepected failure to parse input:\n" + rest.source.toString) + } + } + (chunks.toList, ctxt) + } + + + + /** HTML output **/ + + private val output_styles = + List( + "" -> "html-n", + "plain" -> "html-n", + "alpha" -> "html-a", + "named" -> "html-n", + "paragraph" -> "html-n", + "unsort" -> "html-u", + "unsortlist" -> "html-u") + + def html_output(bib: List[Path], + title: String = "Bibliography", + body: Boolean = false, + citations: List[String] = List("*"), + style: String = "", + chronological: Boolean = false): String = + { + Isabelle_System.with_tmp_dir("bibtex")(tmp_dir => + { + /* database files */ + + val bib_files = bib.map(path => path.split_ext._1) + val bib_names = + { + val names0 = bib_files.map(_.base_name) + if (Library.duplicates(names0).isEmpty) names0 + else names0.zipWithIndex.map({ case (name, i) => (i + 1).toString + "-" + name }) + } + + for ((a, b) <- bib_files zip bib_names) { + File.copy(a.ext("bib"), tmp_dir + Path.basic(b).ext("bib")) + } + + + /* style file */ + + val bst = + output_styles.toMap.get(style) match { + case Some(base) => base + (if (chronological) "c" else "") + ".bst" + case None => + error("Bad style for bibtex HTML output: " + quote(style) + + "\n(expected: " + commas_quote(output_styles.map(_._1)) + ")") + } + File.copy(Path.explode("$BIB2XHTML_HOME/bst") + Path.explode(bst), tmp_dir) + + + /* result */ + + val in_file = Path.explode("bib.aux") + val out_file = Path.explode("bib.html") + + File.write(tmp_dir + in_file, + bib_names.mkString("\\bibdata{", ",", "}\n") + + citations.map(cite => "\\citation{" + cite + "}\n").mkString) + + Isabelle_System.bash( + "\"$BIB2XHTML_HOME/main/bib2xhtml.pl\" -B \"$ISABELLE_BIBTEX\"" + + " -u -s " + Bash.string(proper_string(style) getOrElse "empty") + + (if (chronological) " -c" else "") + + (if (title != "") " -h " + Bash.string(title) + " " else "") + + " " + File.bash_path(in_file) + " " + File.bash_path(out_file), + cwd = tmp_dir.file).check + + val html = File.read(tmp_dir + out_file) + + if (body) { + cat_lines( + split_lines(html). + dropWhile(line => !line.startsWith("