| author | wenzelm |
| Wed, 01 May 2024 20:26:20 +0200 | |
| changeset 80166 | 825f35bae74b |
| parent 79503 | c67b47cd41dc |
| child 80224 | db92e0b6a11a |
| permissions | -rw-r--r-- |
| 79503 | 1 |
/* Title: Pure/General/bibtex.scala |
| 58523 | 2 |
Author: Makarius |
3 |
||
| 79503 | 4 |
Support for BibTeX. |
| 58523 | 5 |
*/ |
6 |
||
7 |
package isabelle |
|
8 |
||
9 |
||
| 67290 | 10 |
import java.io.{File => JFile}
|
11 |
||
| 58528 | 12 |
import scala.collection.mutable |
| 58523 | 13 |
import scala.util.parsing.combinator.RegexParsers |
| 64824 | 14 |
import scala.util.parsing.input.Reader |
| 76972 | 15 |
import scala.util.matching.Regex |
| 58523 | 16 |
|
|
77011
3e48f8c6afc9
parse citations from raw source, without formal context;
wenzelm
parents:
77010
diff
changeset
|
17 |
import isabelle.{Token => Isar_Token}
|
|
3e48f8c6afc9
parse citations from raw source, without formal context;
wenzelm
parents:
77010
diff
changeset
|
18 |
|
| 58523 | 19 |
|
| 75393 | 20 |
object Bibtex {
|
|
69255
800b1ce96fce
more general support for Isabelle/PIDE file formats -- less hardwired Bibtex operations;
wenzelm
parents:
68224
diff
changeset
|
21 |
/** file format **/ |
|
800b1ce96fce
more general support for Isabelle/PIDE file formats -- less hardwired Bibtex operations;
wenzelm
parents:
68224
diff
changeset
|
22 |
|
| 75393 | 23 |
class File_Format extends isabelle.File_Format {
|
|
69255
800b1ce96fce
more general support for Isabelle/PIDE file formats -- less hardwired Bibtex operations;
wenzelm
parents:
68224
diff
changeset
|
24 |
val format_name: String = "bibtex" |
| 69257 | 25 |
val file_ext: String = "bib" |
|
69255
800b1ce96fce
more general support for Isabelle/PIDE file formats -- less hardwired Bibtex operations;
wenzelm
parents:
68224
diff
changeset
|
26 |
|
|
76792
23f433294173
support for generic File_Format.parse_data, with persistent result in document model;
wenzelm
parents:
76779
diff
changeset
|
27 |
override def parse_data(name: String, text: String): Bibtex.Entries = |
|
77030
d7dc5b1e4381
proper positions for Isabelle/ML, instead of Isabelle/Scala;
wenzelm
parents:
77029
diff
changeset
|
28 |
Bibtex.Entries.parse(text, Isar_Token.Pos.file(name)) |
|
76792
23f433294173
support for generic File_Format.parse_data, with persistent result in document model;
wenzelm
parents:
76779
diff
changeset
|
29 |
|
| 69257 | 30 |
override def theory_suffix: String = "bibtex_file" |
| 69259 | 31 |
override def theory_content(name: String): String = |
| 76859 | 32 |
"""theory "bib" imports Pure begin bibtex_file "." end""" |
|
76849
d431a9340163
more systematic Sessions.illegal_theory, based on File_Format.theory_excluded;
wenzelm
parents:
76829
diff
changeset
|
33 |
override def theory_excluded(name: String): Boolean = name == "bib" |
|
69255
800b1ce96fce
more general support for Isabelle/PIDE file formats -- less hardwired Bibtex operations;
wenzelm
parents:
68224
diff
changeset
|
34 |
|
| 75941 | 35 |
override def html_document(snapshot: Document.Snapshot): Option[Browser_Info.HTML_Document] = {
|
|
69255
800b1ce96fce
more general support for Isabelle/PIDE file formats -- less hardwired Bibtex operations;
wenzelm
parents:
68224
diff
changeset
|
36 |
val name = snapshot.node_name |
|
800b1ce96fce
more general support for Isabelle/PIDE file formats -- less hardwired Bibtex operations;
wenzelm
parents:
68224
diff
changeset
|
37 |
if (detect(name.node)) {
|
|
76829
f2a8ba0b8c96
more robust: avoid detour via somewhat fragile Node.Name.path;
wenzelm
parents:
76793
diff
changeset
|
38 |
val title = "Bibliography " + quote(snapshot.node_name.file_name) |
|
69255
800b1ce96fce
more general support for Isabelle/PIDE file formats -- less hardwired Bibtex operations;
wenzelm
parents:
68224
diff
changeset
|
39 |
val content = |
|
800b1ce96fce
more general support for Isabelle/PIDE file formats -- less hardwired Bibtex operations;
wenzelm
parents:
68224
diff
changeset
|
40 |
Isabelle_System.with_tmp_file("bib", "bib") { bib =>
|
| 76933 | 41 |
File.write(bib, snapshot.node.source) |
|
69255
800b1ce96fce
more general support for Isabelle/PIDE file formats -- less hardwired Bibtex operations;
wenzelm
parents:
68224
diff
changeset
|
42 |
Bibtex.html_output(List(bib), style = "unsort", title = title) |
|
800b1ce96fce
more general support for Isabelle/PIDE file formats -- less hardwired Bibtex operations;
wenzelm
parents:
68224
diff
changeset
|
43 |
} |
| 75941 | 44 |
Some(Browser_Info.HTML_Document(title, content)) |
|
69255
800b1ce96fce
more general support for Isabelle/PIDE file formats -- less hardwired Bibtex operations;
wenzelm
parents:
68224
diff
changeset
|
45 |
} |
|
800b1ce96fce
more general support for Isabelle/PIDE file formats -- less hardwired Bibtex operations;
wenzelm
parents:
68224
diff
changeset
|
46 |
else None |
|
800b1ce96fce
more general support for Isabelle/PIDE file formats -- less hardwired Bibtex operations;
wenzelm
parents:
68224
diff
changeset
|
47 |
} |
|
800b1ce96fce
more general support for Isabelle/PIDE file formats -- less hardwired Bibtex operations;
wenzelm
parents:
68224
diff
changeset
|
48 |
} |
|
800b1ce96fce
more general support for Isabelle/PIDE file formats -- less hardwired Bibtex operations;
wenzelm
parents:
68224
diff
changeset
|
49 |
|
|
800b1ce96fce
more general support for Isabelle/PIDE file formats -- less hardwired Bibtex operations;
wenzelm
parents:
68224
diff
changeset
|
50 |
|
|
800b1ce96fce
more general support for Isabelle/PIDE file formats -- less hardwired Bibtex operations;
wenzelm
parents:
68224
diff
changeset
|
51 |
|
| 67203 | 52 |
/** bibtex errors **/ |
53 |
||
| 75393 | 54 |
def bibtex_errors(dir: Path, root_name: String): List[String] = {
|
| 77035 | 55 |
val log_path = dir + Path.explode(root_name).blg |
| 67203 | 56 |
if (log_path.is_file) {
|
| 67300 | 57 |
val Error1 = """^(I couldn't open database file .+)$""".r |
|
73730
2f023b2b0e1e
more uniform bibtex error, without using perl (see 4710dd5093a3);
wenzelm
parents:
73565
diff
changeset
|
58 |
val Error2 = """^(I found no .+)$""".r |
|
2f023b2b0e1e
more uniform bibtex error, without using perl (see 4710dd5093a3);
wenzelm
parents:
73565
diff
changeset
|
59 |
val Error3 = """^(.+)---line (\d+) of file (.+)""".r |
| 67203 | 60 |
Line.logical_lines(File.read(log_path)).flatMap(line => |
61 |
line match {
|
|
| 67300 | 62 |
case Error1(msg) => Some("Bibtex error: " + msg)
|
|
73730
2f023b2b0e1e
more uniform bibtex error, without using perl (see 4710dd5093a3);
wenzelm
parents:
73565
diff
changeset
|
63 |
case Error2(msg) => Some("Bibtex error: " + msg)
|
|
2f023b2b0e1e
more uniform bibtex error, without using perl (see 4710dd5093a3);
wenzelm
parents:
73565
diff
changeset
|
64 |
case Error3(msg, Value.Int(l), file) => |
| 67203 | 65 |
val path = File.standard_path(file) |
66 |
if (Path.is_wellformed(path)) {
|
|
67 |
val pos = Position.Line_File(l, (dir + Path.explode(path)).canonical.implode) |
|
68 |
Some("Bibtex error" + Position.here(pos) + ":\n " + msg)
|
|
69 |
} |
|
70 |
else None |
|
71 |
case _ => None |
|
72 |
}) |
|
73 |
} |
|
74 |
else Nil |
|
75 |
} |
|
76 |
||
77 |
||
78 |
||
| 67272 | 79 |
/** check database **/ |
80 |
||
| 75393 | 81 |
def check_database(database: String): (List[(String, Position.T)], List[(String, Position.T)]) = {
|
|
67275
5e427586cb57
check bibtex database on ML side -- for semantic PIDE editing;
wenzelm
parents:
67274
diff
changeset
|
82 |
val chunks = parse(Line.normalize(database)) |
|
5e427586cb57
check bibtex database on ML side -- for semantic PIDE editing;
wenzelm
parents:
67274
diff
changeset
|
83 |
var chunk_pos = Map.empty[String, Position.T] |
| 67272 | 84 |
val tokens = new mutable.ListBuffer[(Token, Position.T)] |
85 |
var line = 1 |
|
86 |
var offset = 1 |
|
87 |
||
| 67276 | 88 |
def make_pos(length: Int): Position.T = |
89 |
Position.Offset(offset) ::: Position.End_Offset(offset + length) ::: Position.Line(line) |
|
| 67272 | 90 |
|
| 75393 | 91 |
def advance_pos(tok: Token): Unit = {
|
| 67272 | 92 |
for (s <- Symbol.iterator(tok.source)) {
|
93 |
if (Symbol.is_newline(s)) line += 1 |
|
94 |
offset += 1 |
|
95 |
} |
|
96 |
} |
|
97 |
||
98 |
def get_line_pos(l: Int): Position.T = |
|
99 |
if (0 < l && l <= tokens.length) tokens(l - 1)._2 else Position.none |
|
100 |
||
101 |
for (chunk <- chunks) {
|
|
102 |
val name = chunk.name |
|
103 |
if (name != "" && !chunk_pos.isDefinedAt(name)) {
|
|
| 67276 | 104 |
chunk_pos += (name -> make_pos(chunk.heading_length)) |
| 67272 | 105 |
} |
106 |
for (tok <- chunk.tokens) {
|
|
| 67276 | 107 |
tokens += (tok.copy(source = tok.source.replace("\n", " ")) -> make_pos(tok.source.length))
|
| 67272 | 108 |
advance_pos(tok) |
109 |
} |
|
110 |
} |
|
111 |
||
| 75394 | 112 |
Isabelle_System.with_tmp_dir("bibtex") { tmp_dir =>
|
| 67272 | 113 |
File.write(tmp_dir + Path.explode("root.bib"),
|
114 |
tokens.iterator.map(p => p._1.source).mkString("", "\n", "\n"))
|
|
115 |
File.write(tmp_dir + Path.explode("root.aux"),
|
|
116 |
"\\bibstyle{plain}\n\\bibdata{root}\n\\citation{*}")
|
|
117 |
Isabelle_System.bash("\"$ISABELLE_BIBTEX\" root", cwd = tmp_dir.file)
|
|
118 |
||
119 |
val Error = """^(.*)---line (\d+) of file root.bib$""".r |
|
120 |
val Warning = """^Warning--(.+)$""".r |
|
121 |
val Warning_Line = """--line (\d+) of file root.bib$""".r |
|
122 |
val Warning_in_Chunk = """^Warning--(.+) in (.+)$""".r |
|
123 |
||
124 |
val log_file = tmp_dir + Path.explode("root.blg")
|
|
125 |
val lines = if (log_file.is_file) Line.logical_lines(File.read(log_file)) else Nil |
|
126 |
||
| 67301 | 127 |
val (errors, warnings) = |
| 69294 | 128 |
if (lines.isEmpty) (Nil, Nil) |
129 |
else {
|
|
130 |
lines.zip(lines.tail ::: List("")).flatMap(
|
|
131 |
{
|
|
132 |
case (Error(msg, Value.Int(l)), _) => |
|
133 |
Some((true, (msg, get_line_pos(l)))) |
|
134 |
case (Warning_in_Chunk(msg, name), _) if chunk_pos.isDefinedAt(name) => |
|
| 78614 | 135 |
Some((false, (Word.capitalized(msg) + " in entry " + quote(name), chunk_pos(name)))) |
| 69294 | 136 |
case (Warning(msg), Warning_Line(Value.Int(l))) => |
| 78614 | 137 |
Some((false, (Word.capitalized(msg), get_line_pos(l)))) |
| 69294 | 138 |
case (Warning(msg), _) => |
| 78614 | 139 |
Some((false, (Word.capitalized(msg), Position.none))) |
| 69294 | 140 |
case _ => None |
141 |
} |
|
142 |
).partition(_._1) |
|
143 |
} |
|
| 67301 | 144 |
(errors.map(_._2), warnings.map(_._2)) |
| 75394 | 145 |
} |
| 67272 | 146 |
} |
147 |
||
| 75393 | 148 |
object Check_Database extends Scala.Fun_String("bibtex_check_database") {
|
| 72756 | 149 |
val here = Scala_Project.here |
| 75393 | 150 |
def apply(database: String): String = {
|
| 72193 | 151 |
import XML.Encode._ |
152 |
YXML.string_of_body(pair(list(pair(string, properties)), list(pair(string, properties)))( |
|
153 |
check_database(database))) |
|
154 |
} |
|
|
67275
5e427586cb57
check bibtex database on ML side -- for semantic PIDE editing;
wenzelm
parents:
67274
diff
changeset
|
155 |
} |
|
5e427586cb57
check bibtex database on ML side -- for semantic PIDE editing;
wenzelm
parents:
67274
diff
changeset
|
156 |
|
| 67272 | 157 |
|
158 |
||
| 64828 | 159 |
/** document model **/ |
160 |
||
| 67290 | 161 |
/* entries */ |
| 64828 | 162 |
|
| 77032 | 163 |
sealed case class Entry(name: String, pos: Position.T) {
|
164 |
def encode: String = YXML.string_of_body(XML.Encode.properties(Markup.Name(name) ::: pos)) |
|
165 |
} |
|
166 |
||
| 76776 | 167 |
object Entries {
|
168 |
val empty: Entries = apply(Nil) |
|
169 |
||
| 77032 | 170 |
def apply(entries: List[Entry], errors: List[String] = Nil): Entries = |
|
76778
4086a0e4723b
clarified signature: internalize errors (but: the parser rarely fails);
wenzelm
parents:
76776
diff
changeset
|
171 |
new Entries(entries, errors) |
| 76776 | 172 |
|
|
77030
d7dc5b1e4381
proper positions for Isabelle/ML, instead of Isabelle/Scala;
wenzelm
parents:
77029
diff
changeset
|
173 |
def parse(text: String, start: Isar_Token.Pos = Isar_Token.Pos.start): Entries = {
|
| 77032 | 174 |
val entries = new mutable.ListBuffer[Entry] |
|
77030
d7dc5b1e4381
proper positions for Isabelle/ML, instead of Isabelle/Scala;
wenzelm
parents:
77029
diff
changeset
|
175 |
var pos = start |
| 76779 | 176 |
var err_line = 0 |
|
76778
4086a0e4723b
clarified signature: internalize errors (but: the parser rarely fails);
wenzelm
parents:
76776
diff
changeset
|
177 |
|
|
4086a0e4723b
clarified signature: internalize errors (but: the parser rarely fails);
wenzelm
parents:
76776
diff
changeset
|
178 |
try {
|
|
4086a0e4723b
clarified signature: internalize errors (but: the parser rarely fails);
wenzelm
parents:
76776
diff
changeset
|
179 |
for (chunk <- Bibtex.parse(text)) {
|
|
77030
d7dc5b1e4381
proper positions for Isabelle/ML, instead of Isabelle/Scala;
wenzelm
parents:
77029
diff
changeset
|
180 |
val pos1 = pos.advance(chunk.source) |
|
76778
4086a0e4723b
clarified signature: internalize errors (but: the parser rarely fails);
wenzelm
parents:
76776
diff
changeset
|
181 |
if (chunk.name != "" && !chunk.is_command) {
|
| 77032 | 182 |
entries += Entry(chunk.name, pos.position(pos1.offset)) |
|
76778
4086a0e4723b
clarified signature: internalize errors (but: the parser rarely fails);
wenzelm
parents:
76776
diff
changeset
|
183 |
} |
|
77030
d7dc5b1e4381
proper positions for Isabelle/ML, instead of Isabelle/Scala;
wenzelm
parents:
77029
diff
changeset
|
184 |
if (chunk.is_malformed && err_line == 0) { err_line = pos.line }
|
|
d7dc5b1e4381
proper positions for Isabelle/ML, instead of Isabelle/Scala;
wenzelm
parents:
77029
diff
changeset
|
185 |
pos = pos1 |
|
76778
4086a0e4723b
clarified signature: internalize errors (but: the parser rarely fails);
wenzelm
parents:
76776
diff
changeset
|
186 |
} |
| 76779 | 187 |
|
188 |
val err_pos = |
|
|
77030
d7dc5b1e4381
proper positions for Isabelle/ML, instead of Isabelle/Scala;
wenzelm
parents:
77029
diff
changeset
|
189 |
if (err_line == 0 || pos.file.isEmpty) Position.none |
|
d7dc5b1e4381
proper positions for Isabelle/ML, instead of Isabelle/Scala;
wenzelm
parents:
77029
diff
changeset
|
190 |
else Position.Line_File(err_line, pos.file) |
| 76779 | 191 |
val errors = |
192 |
if (err_line == 0) Nil |
|
193 |
else List("Malformed bibtex file" + Position.here(err_pos))
|
|
194 |
||
195 |
apply(entries.toList, errors = errors) |
|
| 76776 | 196 |
} |
|
76778
4086a0e4723b
clarified signature: internalize errors (but: the parser rarely fails);
wenzelm
parents:
76776
diff
changeset
|
197 |
catch { case ERROR(msg) => apply(Nil, errors = List(msg)) }
|
| 64828 | 198 |
} |
199 |
} |
|
200 |
||
| 77032 | 201 |
final class Entries private(val entries: List[Entry], val errors: List[String]) {
|
| 76793 | 202 |
override def toString: String = "Bibtex.Entries(" + entries.length + ")"
|
203 |
||
|
76778
4086a0e4723b
clarified signature: internalize errors (but: the parser rarely fails);
wenzelm
parents:
76776
diff
changeset
|
204 |
def ::: (other: Entries): Entries = |
|
77219
a10161fbc6de
proper orientation for right-associative operations;
wenzelm
parents:
77218
diff
changeset
|
205 |
new Entries(other.entries ::: entries, other.errors ::: errors) |
| 66150 | 206 |
} |
207 |
||
|
77028
f5896dea6fce
more direct check of bibtex entries via Isabelle/Scala;
wenzelm
parents:
77025
diff
changeset
|
208 |
object Session_Entries extends Scala.Fun("bibtex_session_entries") {
|
|
f5896dea6fce
more direct check of bibtex entries via Isabelle/Scala;
wenzelm
parents:
77025
diff
changeset
|
209 |
val here = Scala_Project.here |
|
f5896dea6fce
more direct check of bibtex entries via Isabelle/Scala;
wenzelm
parents:
77025
diff
changeset
|
210 |
|
|
f5896dea6fce
more direct check of bibtex entries via Isabelle/Scala;
wenzelm
parents:
77025
diff
changeset
|
211 |
override def invoke(session: Session, args: List[Bytes]): List[Bytes] = {
|
|
f5896dea6fce
more direct check of bibtex entries via Isabelle/Scala;
wenzelm
parents:
77025
diff
changeset
|
212 |
val sessions = session.resources.sessions_structure |
|
f5896dea6fce
more direct check of bibtex entries via Isabelle/Scala;
wenzelm
parents:
77025
diff
changeset
|
213 |
val id = Value.Long.parse(Library.the_single(args).text) |
|
f5896dea6fce
more direct check of bibtex entries via Isabelle/Scala;
wenzelm
parents:
77025
diff
changeset
|
214 |
val qualifier = |
|
f5896dea6fce
more direct check of bibtex entries via Isabelle/Scala;
wenzelm
parents:
77025
diff
changeset
|
215 |
session.get_state().lookup_id(id) match {
|
|
f5896dea6fce
more direct check of bibtex entries via Isabelle/Scala;
wenzelm
parents:
77025
diff
changeset
|
216 |
case None => Sessions.DRAFT |
|
f5896dea6fce
more direct check of bibtex entries via Isabelle/Scala;
wenzelm
parents:
77025
diff
changeset
|
217 |
case Some(st) => sessions.theory_qualifier(st.command.node_name.theory) |
|
f5896dea6fce
more direct check of bibtex entries via Isabelle/Scala;
wenzelm
parents:
77025
diff
changeset
|
218 |
} |
|
f5896dea6fce
more direct check of bibtex entries via Isabelle/Scala;
wenzelm
parents:
77025
diff
changeset
|
219 |
val res = |
|
f5896dea6fce
more direct check of bibtex entries via Isabelle/Scala;
wenzelm
parents:
77025
diff
changeset
|
220 |
if (qualifier == Sessions.DRAFT || !sessions.defined(qualifier)) Nil |
|
77030
d7dc5b1e4381
proper positions for Isabelle/ML, instead of Isabelle/Scala;
wenzelm
parents:
77029
diff
changeset
|
221 |
else qualifier :: sessions(qualifier).bibtex_entries.entries.map(_.encode) |
|
77028
f5896dea6fce
more direct check of bibtex entries via Isabelle/Scala;
wenzelm
parents:
77025
diff
changeset
|
222 |
res.map(Bytes.apply) |
|
f5896dea6fce
more direct check of bibtex entries via Isabelle/Scala;
wenzelm
parents:
77025
diff
changeset
|
223 |
} |
|
f5896dea6fce
more direct check of bibtex entries via Isabelle/Scala;
wenzelm
parents:
77025
diff
changeset
|
224 |
} |
|
f5896dea6fce
more direct check of bibtex entries via Isabelle/Scala;
wenzelm
parents:
77025
diff
changeset
|
225 |
|
| 64828 | 226 |
|
227 |
||
| 58523 | 228 |
/** content **/ |
229 |
||
| 58529 | 230 |
private val months = List( |
| 58523 | 231 |
"jan", |
232 |
"feb", |
|
233 |
"mar", |
|
234 |
"apr", |
|
235 |
"may", |
|
236 |
"jun", |
|
237 |
"jul", |
|
238 |
"aug", |
|
239 |
"sep", |
|
240 |
"oct", |
|
241 |
"nov", |
|
242 |
"dec") |
|
| 58529 | 243 |
def is_month(s: String): Boolean = months.contains(s.toLowerCase) |
| 58523 | 244 |
|
| 58529 | 245 |
private val commands = List("preamble", "string")
|
246 |
def is_command(s: String): Boolean = commands.contains(s.toLowerCase) |
|
| 58523 | 247 |
|
| 77031 | 248 |
sealed case class Entry_Type( |
| 58526 | 249 |
kind: String, |
| 58523 | 250 |
required: List[String], |
251 |
optional_crossref: List[String], |
|
| 75393 | 252 |
optional_other: List[String] |
253 |
) {
|
|
| 70230 | 254 |
val optional_standard: List[String] = List("url", "doi", "ee")
|
255 |
||
| 58533 | 256 |
def is_required(s: String): Boolean = required.contains(s.toLowerCase) |
| 58529 | 257 |
def is_optional(s: String): Boolean = |
| 70230 | 258 |
optional_crossref.contains(s.toLowerCase) || |
259 |
optional_other.contains(s.toLowerCase) || |
|
260 |
optional_standard.contains(s.toLowerCase) |
|
| 58529 | 261 |
|
| 70230 | 262 |
def fields: List[String] = |
263 |
required ::: optional_crossref ::: optional_other ::: optional_standard |
|
264 |
||
| 58524 | 265 |
def template: String = |
| 58526 | 266 |
"@" + kind + "{,\n" + fields.map(x => " " + x + " = {},\n").mkString + "}\n"
|
| 58524 | 267 |
} |
| 58523 | 268 |
|
| 77031 | 269 |
val known_entries: List[Entry_Type] = |
| 58524 | 270 |
List( |
| 77031 | 271 |
Entry_Type("Article",
|
| 58524 | 272 |
List("author", "title"),
|
273 |
List("journal", "year"),
|
|
274 |
List("volume", "number", "pages", "month", "note")),
|
|
| 77031 | 275 |
Entry_Type("InProceedings",
|
| 58524 | 276 |
List("author", "title"),
|
277 |
List("booktitle", "year"),
|
|
278 |
List("editor", "volume", "number", "series", "pages", "month", "address",
|
|
279 |
"organization", "publisher", "note")), |
|
| 77031 | 280 |
Entry_Type("InCollection",
|
| 58524 | 281 |
List("author", "title", "booktitle"),
|
282 |
List("publisher", "year"),
|
|
283 |
List("editor", "volume", "number", "series", "type", "chapter", "pages",
|
|
284 |
"edition", "month", "address", "note")), |
|
| 77031 | 285 |
Entry_Type("InBook",
|
| 58524 | 286 |
List("author", "editor", "title", "chapter"),
|
287 |
List("publisher", "year"),
|
|
288 |
List("volume", "number", "series", "type", "address", "edition", "month", "pages", "note")),
|
|
| 77031 | 289 |
Entry_Type("Proceedings",
|
| 58524 | 290 |
List("title", "year"),
|
291 |
List(), |
|
292 |
List("booktitle", "editor", "volume", "number", "series", "address", "month",
|
|
293 |
"organization", "publisher", "note")), |
|
| 77031 | 294 |
Entry_Type("Book",
|
| 58524 | 295 |
List("author", "editor", "title"),
|
296 |
List("publisher", "year"),
|
|
297 |
List("volume", "number", "series", "address", "edition", "month", "note")),
|
|
| 77031 | 298 |
Entry_Type("Booklet",
|
| 58524 | 299 |
List("title"),
|
300 |
List(), |
|
301 |
List("author", "howpublished", "address", "month", "year", "note")),
|
|
| 77031 | 302 |
Entry_Type("PhdThesis",
|
| 58524 | 303 |
List("author", "title", "school", "year"),
|
304 |
List(), |
|
305 |
List("type", "address", "month", "note")),
|
|
| 77031 | 306 |
Entry_Type("MastersThesis",
|
| 58524 | 307 |
List("author", "title", "school", "year"),
|
308 |
List(), |
|
309 |
List("type", "address", "month", "note")),
|
|
| 77031 | 310 |
Entry_Type("TechReport",
|
| 58524 | 311 |
List("author", "title", "institution", "year"),
|
312 |
List(), |
|
313 |
List("type", "number", "address", "month", "note")),
|
|
| 77031 | 314 |
Entry_Type("Manual",
|
| 58524 | 315 |
List("title"),
|
316 |
List(), |
|
317 |
List("author", "organization", "address", "edition", "month", "year", "note")),
|
|
| 77031 | 318 |
Entry_Type("Unpublished",
|
| 58524 | 319 |
List("author", "title", "note"),
|
320 |
List(), |
|
321 |
List("month", "year")),
|
|
| 77031 | 322 |
Entry_Type("Misc",
|
| 58524 | 323 |
List(), |
324 |
List(), |
|
325 |
List("author", "title", "howpublished", "month", "year", "note")))
|
|
| 58523 | 326 |
|
| 77031 | 327 |
def known_entry(kind: String): Option[Entry_Type] = |
| 66150 | 328 |
known_entries.find(entry => entry.kind.toLowerCase == kind.toLowerCase) |
| 58529 | 329 |
|
| 58523 | 330 |
|
331 |
||
332 |
/** tokens and chunks **/ |
|
333 |
||
| 75393 | 334 |
object Token {
|
335 |
object Kind extends Enumeration {
|
|
| 58529 | 336 |
val COMMAND = Value("command")
|
337 |
val ENTRY = Value("entry")
|
|
| 58523 | 338 |
val KEYWORD = Value("keyword")
|
339 |
val NAT = Value("natural number")
|
|
| 58529 | 340 |
val STRING = Value("string")
|
| 58531 | 341 |
val NAME = Value("name")
|
| 58523 | 342 |
val IDENT = Value("identifier")
|
| 58535 | 343 |
val SPACE = Value("white space")
|
344 |
val COMMENT = Value("ignored text")
|
|
| 58523 | 345 |
val ERROR = Value("bad input")
|
346 |
} |
|
347 |
} |
|
348 |
||
| 75393 | 349 |
sealed case class Token(kind: Token.Kind.Value, source: String) {
|
| 58530 | 350 |
def is_kind: Boolean = |
351 |
kind == Token.Kind.COMMAND || |
|
352 |
kind == Token.Kind.ENTRY || |
|
353 |
kind == Token.Kind.IDENT |
|
| 58531 | 354 |
def is_name: Boolean = |
355 |
kind == Token.Kind.NAME || |
|
356 |
kind == Token.Kind.IDENT |
|
| 58535 | 357 |
def is_ignored: Boolean = |
358 |
kind == Token.Kind.SPACE || |
|
359 |
kind == Token.Kind.COMMENT |
|
| 67276 | 360 |
def is_malformed: Boolean = |
361 |
kind == Token.Kind.ERROR |
|
362 |
def is_open: Boolean = |
|
363 |
kind == Token.Kind.KEYWORD && (source == "{" || source == "(")
|
|
| 58523 | 364 |
} |
365 |
||
| 75393 | 366 |
case class Chunk(kind: String, tokens: List[Token]) {
|
|
77010
fead2b33acdc
tuned signature: fewer warnings in IntelliJ IDEA;
wenzelm
parents:
77007
diff
changeset
|
367 |
val source: String = tokens.map(_.source).mkString |
| 58526 | 368 |
|
| 58530 | 369 |
private val content: Option[List[Token]] = |
| 58523 | 370 |
tokens match {
|
| 59319 | 371 |
case Token(Token.Kind.KEYWORD, "@") :: body if body.nonEmpty => |
| 58529 | 372 |
(body.init.filterNot(_.is_ignored), body.last) match {
|
| 58530 | 373 |
case (tok :: Token(Token.Kind.KEYWORD, "{") :: toks, Token(Token.Kind.KEYWORD, "}"))
|
374 |
if tok.is_kind => Some(toks) |
|
375 |
||
376 |
case (tok :: Token(Token.Kind.KEYWORD, "(") :: toks, Token(Token.Kind.KEYWORD, ")"))
|
|
377 |
if tok.is_kind => Some(toks) |
|
378 |
||
| 58528 | 379 |
case _ => None |
| 58523 | 380 |
} |
| 58528 | 381 |
case _ => None |
| 58526 | 382 |
} |
383 |
||
384 |
def name: String = |
|
| 58530 | 385 |
content match {
|
| 58531 | 386 |
case Some(tok :: _) if tok.is_name => tok.source |
| 58523 | 387 |
case _ => "" |
388 |
} |
|
| 58530 | 389 |
|
| 67276 | 390 |
def heading_length: Int = |
391 |
if (name == "") 1 |
|
| 73359 | 392 |
else {
|
393 |
tokens.takeWhile(tok => !tok.is_open).foldLeft(0) { case (n, tok) => n + tok.source.length }
|
|
394 |
} |
|
| 67276 | 395 |
|
| 58530 | 396 |
def is_ignored: Boolean = kind == "" && tokens.forall(_.is_ignored) |
| 76779 | 397 |
def is_malformed: Boolean = tokens.exists(_.is_malformed) |
| 58543 | 398 |
def is_command: Boolean = Bibtex.is_command(kind) && name != "" && content.isDefined |
| 77031 | 399 |
def is_entry: Boolean = Bibtex.known_entry(kind).isDefined && name != "" && content.isDefined |
| 58523 | 400 |
} |
401 |
||
402 |
||
403 |
||
404 |
/** parsing **/ |
|
405 |
||
406 |
// context of partial line-oriented scans |
|
407 |
abstract class Line_Context |
|
| 58589 | 408 |
case object Ignored extends Line_Context |
|
58590
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
409 |
case object At extends Line_Context |
|
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
410 |
case class Item_Start(kind: String) extends Line_Context |
| 58591 | 411 |
case class Item_Open(kind: String, end: String) extends Line_Context |
412 |
case class Item(kind: String, end: String, delim: Delimited) extends Line_Context |
|
|
58590
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
413 |
|
| 58528 | 414 |
case class Delimited(quoted: Boolean, depth: Int) |
|
77010
fead2b33acdc
tuned signature: fewer warnings in IntelliJ IDEA;
wenzelm
parents:
77007
diff
changeset
|
415 |
val Closed: Delimited = Delimited(false, 0) |
| 58523 | 416 |
|
417 |
private def token(kind: Token.Kind.Value)(source: String): Token = Token(kind, source) |
|
418 |
private def keyword(source: String): Token = Token(Token.Kind.KEYWORD, source) |
|
419 |
||
420 |
||
| 68224 | 421 |
// See also https://ctan.org/tex-archive/biblio/bibtex/base/bibtex.web |
| 58523 | 422 |
// module @<Scan for and process a \.{.bib} command or database entry@>.
|
423 |
||
| 75393 | 424 |
object Parsers extends RegexParsers {
|
| 58523 | 425 |
/* white space and comments */ |
426 |
||
427 |
override val whiteSpace = "".r |
|
428 |
||
| 58535 | 429 |
private val space = """[ \t\n\r]+""".r ^^ token(Token.Kind.SPACE) |
|
58590
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
430 |
private val spaces = rep(space) |
| 58523 | 431 |
|
| 58528 | 432 |
|
| 58535 | 433 |
/* ignored text */ |
| 58528 | 434 |
|
435 |
private val ignored: Parser[Chunk] = |
|
| 58609 | 436 |
rep1("""(?i)([^@]+|@[ \t]*comment)""".r) ^^ {
|
| 58535 | 437 |
case ss => Chunk("", List(Token(Token.Kind.COMMENT, ss.mkString))) }
|
| 58523 | 438 |
|
|
58536
402a8e8107a7
more total chunk_line: recovery via ignored_line;
wenzelm
parents:
58535
diff
changeset
|
439 |
private def ignored_line: Parser[(Chunk, Line_Context)] = |
| 58589 | 440 |
ignored ^^ { case a => (a, Ignored) }
|
|
58536
402a8e8107a7
more total chunk_line: recovery via ignored_line;
wenzelm
parents:
58535
diff
changeset
|
441 |
|
| 58523 | 442 |
|
443 |
/* delimited string: outermost "..." or {...} and body with balanced {...} */
|
|
444 |
||
| 58534 | 445 |
// see also bibtex.web: scan_a_field_token_and_eat_white, scan_balanced_braces |
| 58523 | 446 |
private def delimited_depth(delim: Delimited): Parser[(String, Delimited)] = |
| 75393 | 447 |
new Parser[(String, Delimited)] {
|
|
73120
c3589f2dff31
more informative errors: simplify diagnosis of spurious failures reported by users;
wenzelm
parents:
72957
diff
changeset
|
448 |
require(if (delim.quoted) delim.depth > 0 else delim.depth >= 0, |
|
c3589f2dff31
more informative errors: simplify diagnosis of spurious failures reported by users;
wenzelm
parents:
72957
diff
changeset
|
449 |
"bad delimiter depth") |
| 58523 | 450 |
|
|
77010
fead2b33acdc
tuned signature: fewer warnings in IntelliJ IDEA;
wenzelm
parents:
77007
diff
changeset
|
451 |
def apply(in: Input): ParseResult[(String, Delimited)] = {
|
| 58523 | 452 |
val start = in.offset |
453 |
val end = in.source.length |
|
454 |
||
455 |
var i = start |
|
456 |
var q = delim.quoted |
|
457 |
var d = delim.depth |
|
458 |
var finished = false |
|
459 |
while (!finished && i < end) {
|
|
460 |
val c = in.source.charAt(i) |
|
| 58534 | 461 |
|
| 58523 | 462 |
if (c == '"' && d == 0) { i += 1; d = 1; q = true }
|
| 58532 | 463 |
else if (c == '"' && d == 1 && q) {
|
464 |
i += 1; d = 0; q = false; finished = true |
|
465 |
} |
|
| 58523 | 466 |
else if (c == '{') { i += 1; d += 1 }
|
| 58534 | 467 |
else if (c == '}') {
|
468 |
if (d == 1 && !q || d > 1) { i += 1; d -= 1; if (d == 0) finished = true }
|
|
469 |
else {i = start; finished = true }
|
|
| 58532 | 470 |
} |
| 58523 | 471 |
else if (d > 0) i += 1 |
472 |
else finished = true |
|
473 |
} |
|
474 |
if (i == start) Failure("bad input", in)
|
|
| 58528 | 475 |
else {
|
476 |
val s = in.source.subSequence(start, i).toString |
|
477 |
Success((s, Delimited(q, d)), in.drop(i - start)) |
|
478 |
} |
|
| 58523 | 479 |
} |
480 |
}.named("delimited_depth")
|
|
481 |
||
| 58528 | 482 |
private def delimited: Parser[Token] = |
483 |
delimited_depth(Closed) ^? |
|
484 |
{ case (s, delim) if delim == Closed => Token(Token.Kind.STRING, s) }
|
|
| 58523 | 485 |
|
| 58534 | 486 |
private def recover_delimited: Parser[Token] = |
| 58609 | 487 |
"""["{][^@]*""".r ^^ token(Token.Kind.ERROR)
|
| 58534 | 488 |
|
|
58590
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
489 |
def delimited_line(ctxt: Item): Parser[(Chunk, Line_Context)] = |
|
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
490 |
delimited_depth(ctxt.delim) ^^ { case (s, delim1) =>
|
|
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
491 |
(Chunk(ctxt.kind, List(Token(Token.Kind.STRING, s))), ctxt.copy(delim = delim1)) } | |
|
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
492 |
recover_delimited ^^ { case a => (Chunk(ctxt.kind, List(a)), Ignored) }
|
| 58523 | 493 |
|
494 |
||
495 |
/* other tokens */ |
|
496 |
||
497 |
private val at = "@" ^^ keyword |
|
498 |
||
499 |
private val nat = "[0-9]+".r ^^ token(Token.Kind.NAT) |
|
500 |
||
| 58591 | 501 |
private val name = """[\x21-\x7f&&[^"#%'(),={}]]+""".r ^^ token(Token.Kind.NAME)
|
502 |
||
| 58529 | 503 |
private val identifier = |
504 |
"""[\x21-\x7f&&[^"#%'(),={}0-9]][\x21-\x7f&&[^"#%'(),={}]]*""".r
|
|
505 |
||
506 |
private val ident = identifier ^^ token(Token.Kind.IDENT) |
|
| 58523 | 507 |
|
|
77010
fead2b33acdc
tuned signature: fewer warnings in IntelliJ IDEA;
wenzelm
parents:
77007
diff
changeset
|
508 |
val other_token: Parser[Token] = "[=#,]".r ^^ keyword | (nat | (ident | space)) |
| 58528 | 509 |
|
510 |
||
| 58591 | 511 |
/* body */ |
512 |
||
513 |
private val body = |
|
514 |
delimited | (recover_delimited | other_token) |
|
515 |
||
516 |
private def body_line(ctxt: Item) = |
|
517 |
if (ctxt.delim.depth > 0) |
|
518 |
delimited_line(ctxt) |
|
519 |
else |
|
520 |
delimited_line(ctxt) | |
|
521 |
other_token ^^ { case a => (Chunk(ctxt.kind, List(a)), ctxt) } |
|
|
522 |
ctxt.end ^^ { case a => (Chunk(ctxt.kind, List(keyword(a))), Ignored) }
|
|
523 |
||
524 |
||
| 58530 | 525 |
/* items: command or entry */ |
| 58528 | 526 |
|
| 58530 | 527 |
private val item_kind = |
| 58529 | 528 |
identifier ^^ { case a =>
|
529 |
val kind = |
|
530 |
if (is_command(a)) Token.Kind.COMMAND |
|
| 77031 | 531 |
else if (known_entry(a).isDefined) Token.Kind.ENTRY |
| 58529 | 532 |
else Token.Kind.IDENT |
533 |
Token(kind, a) |
|
534 |
} |
|
535 |
||
| 58591 | 536 |
private val item_begin = |
537 |
"{" ^^ { case a => ("}", keyword(a)) } |
|
|
538 |
"(" ^^ { case a => (")", keyword(a)) }
|
|
539 |
||
540 |
private def item_name(kind: String) = |
|
541 |
kind.toLowerCase match {
|
|
542 |
case "preamble" => failure("")
|
|
543 |
case "string" => identifier ^^ token(Token.Kind.NAME) |
|
544 |
case _ => name |
|
545 |
} |
|
546 |
||
| 58531 | 547 |
private val item_start = |
|
58590
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
548 |
at ~ spaces ~ item_kind ~ spaces ^^ |
| 58528 | 549 |
{ case a ~ b ~ c ~ d => (c.source, List(a) ::: b ::: List(c) ::: d) }
|
550 |
||
| 58530 | 551 |
private val item: Parser[Chunk] = |
| 58591 | 552 |
(item_start ~ item_begin ~ spaces) into |
553 |
{ case (kind, a) ~ ((end, b)) ~ c =>
|
|
554 |
opt(item_name(kind)) ~ rep(body) ~ opt(end ^^ keyword) ^^ {
|
|
555 |
case d ~ e ~ f => Chunk(kind, a ::: List(b) ::: c ::: d.toList ::: e ::: f.toList) } } |
|
| 58528 | 556 |
|
| 58530 | 557 |
private val recover_item: Parser[Chunk] = |
| 58609 | 558 |
at ~ "[^@]*".r ^^ { case a ~ b => Chunk("", List(a, Token(Token.Kind.ERROR, b))) }
|
| 58528 | 559 |
|
| 58591 | 560 |
|
561 |
/* chunks */ |
|
| 58523 | 562 |
|
| 58528 | 563 |
val chunk: Parser[Chunk] = ignored | (item | recover_item) |
| 58523 | 564 |
|
| 75393 | 565 |
def chunk_line(ctxt: Line_Context): Parser[(Chunk, Line_Context)] = {
|
| 58530 | 566 |
ctxt match {
|
| 58589 | 567 |
case Ignored => |
|
58538
299b82d12d53
proper treatment of @comment (amending 402a8e8107a7);
wenzelm
parents:
58536
diff
changeset
|
568 |
ignored_line | |
|
58590
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
569 |
at ^^ { case a => (Chunk("", List(a)), At) }
|
|
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
570 |
|
|
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
571 |
case At => |
|
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
572 |
space ^^ { case a => (Chunk("", List(a)), ctxt) } |
|
|
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
573 |
item_kind ^^ { case a => (Chunk(a.source, List(a)), Item_Start(a.source)) } |
|
|
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
574 |
recover_item ^^ { case a => (a, Ignored) } |
|
|
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
575 |
ignored_line |
|
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
576 |
|
|
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
577 |
case Item_Start(kind) => |
|
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
578 |
space ^^ { case a => (Chunk(kind, List(a)), ctxt) } |
|
| 58591 | 579 |
item_begin ^^ { case (end, a) => (Chunk(kind, List(a)), Item_Open(kind, end)) } |
|
| 58596 | 580 |
recover_item ^^ { case a => (a, Ignored) } |
|
|
58590
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
581 |
ignored_line |
|
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
582 |
|
| 58591 | 583 |
case Item_Open(kind, end) => |
|
58590
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
584 |
space ^^ { case a => (Chunk(kind, List(a)), ctxt) } |
|
| 58591 | 585 |
item_name(kind) ^^ { case a => (Chunk(kind, List(a)), Item(kind, end, Closed)) } |
|
586 |
body_line(Item(kind, end, Closed)) | |
|
|
58590
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
587 |
ignored_line |
|
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
588 |
|
|
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
589 |
case item_ctxt: Item => |
| 58591 | 590 |
body_line(item_ctxt) | |
|
58590
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
591 |
ignored_line |
|
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
592 |
|
| 58530 | 593 |
case _ => failure("")
|
594 |
} |
|
595 |
} |
|
| 58528 | 596 |
} |
| 58523 | 597 |
|
598 |
||
| 58528 | 599 |
/* parse */ |
| 58523 | 600 |
|
601 |
def parse(input: CharSequence): List[Chunk] = |
|
| 64824 | 602 |
Parsers.parseAll(Parsers.rep(Parsers.chunk), Scan.char_reader(input)) match {
|
| 58523 | 603 |
case Parsers.Success(result, _) => result |
| 58526 | 604 |
case _ => error("Unexpected failure to parse input:\n" + input.toString)
|
| 58523 | 605 |
} |
| 58528 | 606 |
|
| 75393 | 607 |
def parse_line(input: CharSequence, context: Line_Context): (List[Chunk], Line_Context) = {
|
| 64824 | 608 |
var in: Reader[Char] = Scan.char_reader(input) |
| 58528 | 609 |
val chunks = new mutable.ListBuffer[Chunk] |
610 |
var ctxt = context |
|
611 |
while (!in.atEnd) {
|
|
| 75420 | 612 |
val result = Parsers.parse(Parsers.chunk_line(ctxt), in) |
613 |
(result : @unchecked) match {
|
|
| 60215 | 614 |
case Parsers.Success((x, c), rest) => chunks += x; ctxt = c; in = rest |
| 58528 | 615 |
case Parsers.NoSuccess(_, rest) => |
616 |
error("Unepected failure to parse input:\n" + rest.source.toString)
|
|
617 |
} |
|
618 |
} |
|
619 |
(chunks.toList, ctxt) |
|
620 |
} |
|
| 67243 | 621 |
|
622 |
||
623 |
||
624 |
/** HTML output **/ |
|
625 |
||
626 |
private val output_styles = |
|
627 |
List( |
|
| 67257 | 628 |
"" -> "html-n", |
| 67243 | 629 |
"plain" -> "html-n", |
630 |
"alpha" -> "html-a", |
|
631 |
"named" -> "html-n", |
|
632 |
"paragraph" -> "html-n", |
|
633 |
"unsort" -> "html-u", |
|
634 |
"unsortlist" -> "html-u") |
|
635 |
||
636 |
def html_output(bib: List[Path], |
|
| 67256 | 637 |
title: String = "Bibliography", |
|
67248
68177abb2988
isabelle.preview presents bibtex database files as well;
wenzelm
parents:
67243
diff
changeset
|
638 |
body: Boolean = false, |
| 67243 | 639 |
citations: List[String] = List("*"),
|
| 67257 | 640 |
style: String = "", |
| 75393 | 641 |
chronological: Boolean = false |
642 |
): String = {
|
|
| 75394 | 643 |
Isabelle_System.with_tmp_dir("bibtex") { tmp_dir =>
|
| 67243 | 644 |
/* database files */ |
645 |
||
| 69367 | 646 |
val bib_files = bib.map(_.drop_ext) |
| 75393 | 647 |
val bib_names = {
|
| 69366 | 648 |
val names0 = bib_files.map(_.file_name) |
| 67243 | 649 |
if (Library.duplicates(names0).isEmpty) names0 |
650 |
else names0.zipWithIndex.map({ case (name, i) => (i + 1).toString + "-" + name })
|
|
651 |
} |
|
652 |
||
653 |
for ((a, b) <- bib_files zip bib_names) {
|
|
| 77035 | 654 |
Isabelle_System.copy_file(a.bib, tmp_dir + Path.basic(b).bib) |
| 67243 | 655 |
} |
656 |
||
657 |
||
658 |
/* style file */ |
|
659 |
||
660 |
val bst = |
|
661 |
output_styles.toMap.get(style) match {
|
|
662 |
case Some(base) => base + (if (chronological) "c" else "") + ".bst" |
|
663 |
case None => |
|
664 |
error("Bad style for bibtex HTML output: " + quote(style) +
|
|
665 |
"\n(expected: " + commas_quote(output_styles.map(_._1)) + ")") |
|
666 |
} |
|
| 73317 | 667 |
Isabelle_System.copy_file(Path.explode("$BIB2XHTML_HOME/bst") + Path.explode(bst), tmp_dir)
|
| 67243 | 668 |
|
669 |
||
670 |
/* result */ |
|
671 |
||
672 |
val in_file = Path.explode("bib.aux")
|
|
673 |
val out_file = Path.explode("bib.html")
|
|
674 |
||
675 |
File.write(tmp_dir + in_file, |
|
676 |
bib_names.mkString("\\bibdata{", ",", "}\n") +
|
|
677 |
citations.map(cite => "\\citation{" + cite + "}\n").mkString)
|
|
678 |
||
679 |
Isabelle_System.bash( |
|
680 |
"\"$BIB2XHTML_HOME/main/bib2xhtml.pl\" -B \"$ISABELLE_BIBTEX\"" + |
|
| 67257 | 681 |
" -u -s " + Bash.string(proper_string(style) getOrElse "empty") + |
682 |
(if (chronological) " -c" else "") + |
|
| 77369 | 683 |
if_proper(title, " -h " + Bash.string(title) + " ") + |
| 67256 | 684 |
" " + File.bash_path(in_file) + " " + File.bash_path(out_file), |
| 67243 | 685 |
cwd = tmp_dir.file).check |
686 |
||
687 |
val html = File.read(tmp_dir + out_file) |
|
688 |
||
|
67248
68177abb2988
isabelle.preview presents bibtex database files as well;
wenzelm
parents:
67243
diff
changeset
|
689 |
if (body) {
|
|
68177abb2988
isabelle.preview presents bibtex database files as well;
wenzelm
parents:
67243
diff
changeset
|
690 |
cat_lines( |
|
68177abb2988
isabelle.preview presents bibtex database files as well;
wenzelm
parents:
67243
diff
changeset
|
691 |
split_lines(html). |
|
68177abb2988
isabelle.preview presents bibtex database files as well;
wenzelm
parents:
67243
diff
changeset
|
692 |
dropWhile(line => !line.startsWith("<!-- BEGIN BIBLIOGRAPHY")).reverse.
|
|
68177abb2988
isabelle.preview presents bibtex database files as well;
wenzelm
parents:
67243
diff
changeset
|
693 |
dropWhile(line => !line.startsWith("<!-- END BIBLIOGRAPHY")).reverse)
|
|
68177abb2988
isabelle.preview presents bibtex database files as well;
wenzelm
parents:
67243
diff
changeset
|
694 |
} |
|
68177abb2988
isabelle.preview presents bibtex database files as well;
wenzelm
parents:
67243
diff
changeset
|
695 |
else html |
| 75394 | 696 |
} |
| 67243 | 697 |
} |
| 76972 | 698 |
|
699 |
||
700 |
||
701 |
/** cite commands and antiquotations **/ |
|
702 |
||
| 77014 | 703 |
/* cite commands */ |
704 |
||
705 |
def cite_commands(options: Options): List[String] = |
|
| 77218 | 706 |
space_explode(',', options.string("document_cite_commands"))
|
| 77014 | 707 |
|
708 |
val CITE = "cite" |
|
709 |
val NOCITE = "nocite" |
|
710 |
||
711 |
||
| 77020 | 712 |
/* citations within theory source */ |
| 77012 | 713 |
|
| 77015 | 714 |
object Cite {
|
| 77016 | 715 |
def unapply(tree: XML.Tree): Option[Inner] = |
716 |
tree match {
|
|
717 |
case XML.Elem(Markup(Markup.Latex_Cite.name, props), body) => |
|
718 |
val kind = Markup.Kind.unapply(props).getOrElse(CITE) |
|
719 |
val citations = Markup.Citations.get(props) |
|
720 |
val pos = props.filter(Markup.position_property) |
|
721 |
Some(Inner(kind, citations, body, pos)) |
|
722 |
case _ => None |
|
723 |
} |
|
724 |
||
| 77024 | 725 |
sealed case class Inner(kind: String, citations: String, location: XML.Body, pos: Position.T) {
|
| 77016 | 726 |
def nocite: Inner = copy(kind = NOCITE, location = Nil) |
727 |
||
|
77025
34219d664854
proper citations for unselected theories, notably for the default selection of the GUI panel;
wenzelm
parents:
77024
diff
changeset
|
728 |
def latex_text: Latex.Text = {
|
|
34219d664854
proper citations for unselected theories, notably for the default selection of the GUI panel;
wenzelm
parents:
77024
diff
changeset
|
729 |
val props = |
|
34219d664854
proper citations for unselected theories, notably for the default selection of the GUI panel;
wenzelm
parents:
77024
diff
changeset
|
730 |
(if (kind.nonEmpty) Markup.Kind(kind) else Nil) ::: |
|
34219d664854
proper citations for unselected theories, notably for the default selection of the GUI panel;
wenzelm
parents:
77024
diff
changeset
|
731 |
Markup.Citations(citations) ::: pos |
|
34219d664854
proper citations for unselected theories, notably for the default selection of the GUI panel;
wenzelm
parents:
77024
diff
changeset
|
732 |
List(XML.Elem(Markup.Latex_Cite(props), location)) |
|
34219d664854
proper citations for unselected theories, notably for the default selection of the GUI panel;
wenzelm
parents:
77024
diff
changeset
|
733 |
} |
|
34219d664854
proper citations for unselected theories, notably for the default selection of the GUI panel;
wenzelm
parents:
77024
diff
changeset
|
734 |
|
| 77024 | 735 |
override def toString: String = citations |
| 77016 | 736 |
} |
737 |
||
738 |
sealed case class Outer(kind: String, body: String, start: Isar_Token.Pos) {
|
|
739 |
def pos: Position.T = start.position() |
|
740 |
||
741 |
def parse: Option[Inner] = {
|
|
742 |
val tokens = Isar_Token.explode(Parsers.keywords, body) |
|
743 |
Parsers.parse_all(Parsers.inner(pos), Isar_Token.reader(tokens, start)) match {
|
|
744 |
case Parsers.Success(res, _) => Some(res) |
|
745 |
case _ => None |
|
746 |
} |
|
747 |
} |
|
748 |
||
749 |
def errors: List[String] = |
|
750 |
if (parse.isDefined) Nil |
|
751 |
else List("Malformed citation" + Position.here(pos))
|
|
752 |
||
753 |
override def toString: String = |
|
754 |
parse match {
|
|
755 |
case Some(inner) => inner.toString |
|
756 |
case None => "<malformed>" |
|
757 |
} |
|
758 |
} |
|
759 |
||
| 77015 | 760 |
object Parsers extends Parse.Parsers {
|
| 77017 | 761 |
val keywords: Keyword.Keywords = Thy_Header.bootstrap_keywords + "in" + "using" |
| 77015 | 762 |
|
763 |
val location: Parser[String] = embedded ~ $$$("in") ^^ { case x ~ _ => x } | success("")
|
|
764 |
val citations: Parser[String] = rep1sep(name, $$$("and")) ^^ (x => x.mkString(","))
|
|
765 |
val kind: Parser[String] = $$$("using") ~! name ^^ { case _ ~ x => x } | success(CITE)
|
|
766 |
||
| 77016 | 767 |
def inner(pos: Position.T): Parser[Inner] = |
768 |
location ~ citations ~ kind ^^ |
|
769 |
{ case x ~ y ~ z => Inner(z, y, XML.string(x), pos) }
|
|
| 77015 | 770 |
} |
771 |
||
772 |
def parse( |
|
773 |
cite_commands: List[String], |
|
774 |
text: String, |
|
775 |
start: Isar_Token.Pos = Isar_Token.Pos.start |
|
| 77016 | 776 |
): List[Outer] = {
|
| 77015 | 777 |
val controls = cite_commands.map(s => Symbol.control_prefix + s + Symbol.control_suffix) |
778 |
val special = (controls ::: controls.map(Symbol.decode)).toSet |
|
779 |
||
780 |
val Parsers = Antiquote.Parsers |
|
781 |
Parsers.parseAll(Parsers.rep(Parsers.antiquote_special(special)), text) match {
|
|
782 |
case Parsers.Success(ants, _) => |
|
783 |
var pos = start |
|
| 77016 | 784 |
val result = new mutable.ListBuffer[Outer] |
| 77015 | 785 |
for (ant <- ants) {
|
786 |
ant match {
|
|
787 |
case Antiquote.Control(source) => |
|
788 |
for {
|
|
789 |
head <- Symbol.iterator(source).nextOption |
|
790 |
kind <- Symbol.control_name(Symbol.encode(head)) |
|
791 |
} {
|
|
792 |
val rest = source.substring(head.length) |
|
793 |
val (body, pos1) = |
|
794 |
if (rest.isEmpty) (rest, pos) |
|
795 |
else (Scan.Parsers.cartouche_content(rest), pos.advance(Symbol.open)) |
|
| 77016 | 796 |
result += Outer(kind, body, pos1) |
| 77015 | 797 |
} |
798 |
case _ => |
|
799 |
} |
|
800 |
pos = pos.advance(ant.source) |
|
801 |
} |
|
802 |
result.toList |
|
803 |
case _ => error("Unexpected failure parsing special antiquotations:\n" + text)
|
|
804 |
} |
|
805 |
} |
|
|
77011
3e48f8c6afc9
parse citations from raw source, without formal context;
wenzelm
parents:
77010
diff
changeset
|
806 |
} |
| 77020 | 807 |
|
808 |
||
809 |
/* update old forms: @{cite ...} and \cite{...} */
|
|
810 |
||
811 |
def cite_antiquotation(name: String, body: String): String = |
|
812 |
"""\<^""" + name + """>\<open>""" + body + """\<close>""" |
|
813 |
||
814 |
def cite_antiquotation(name: String, location: String, citations: List[String]): String = {
|
|
815 |
val body = |
|
| 77368 | 816 |
if_proper(location, Symbol.cartouche(location) + " in ") + |
| 77020 | 817 |
citations.map(quote).mkString(" and ")
|
818 |
cite_antiquotation(name, body) |
|
819 |
} |
|
820 |
||
821 |
private val Cite_Command = """\\(cite|nocite|citet|citep)((?:\[[^\]]*\])?)\{([^}]*)\}""".r
|
|
822 |
private val Cite_Macro = """\[\s*cite_macro\s*=\s*"?(\w+)"?\]\s*""".r |
|
823 |
||
824 |
def update_cite_commands(str: String): String = |
|
825 |
Cite_Command.replaceAllIn(str, { m =>
|
|
826 |
val name = m.group(1) |
|
827 |
val loc = m.group(2) |
|
828 |
val location = |
|
829 |
if (loc.startsWith("[") && loc.endsWith("]")) loc.substring(1, loc.length - 1)
|
|
830 |
else loc |
|
| 77218 | 831 |
val citations = space_explode(',', m.group(3)).map(_.trim)
|
| 77020 | 832 |
Regex.quoteReplacement(cite_antiquotation(name, location, citations)) |
833 |
}) |
|
834 |
||
835 |
def update_cite_antiquotation(cite_commands: List[String], str: String): String = {
|
|
836 |
val opt_body = |
|
837 |
for {
|
|
838 |
str1 <- Library.try_unprefix("@{cite", str)
|
|
839 |
str2 <- Library.try_unsuffix("}", str1)
|
|
840 |
} yield str2.trim |
|
841 |
||
842 |
opt_body match {
|
|
843 |
case None => str |
|
844 |
case Some(body0) => |
|
845 |
val (name, body1) = |
|
846 |
Cite_Macro.findFirstMatchIn(body0) match {
|
|
847 |
case None => (CITE, body0) |
|
848 |
case Some(m) => (m.group(1), Cite_Macro.replaceAllIn(body0, "")) |
|
849 |
} |
|
850 |
val body2 = body1.replace("""\<close>""", """\<close> in""")
|
|
851 |
if (cite_commands.contains(name)) cite_antiquotation(name, body2) |
|
852 |
else cite_antiquotation(CITE, body2 + " using " + quote(name)) |
|
853 |
} |
|
854 |
} |
|
| 58523 | 855 |
} |