author | wenzelm |
Wed, 18 Jan 2023 16:15:41 +0100 | |
changeset 77007 | 19a7046f90f9 |
parent 76986 | 1e31ddcab458 |
child 77010 | fead2b33acdc |
permissions | -rw-r--r-- |
67274 | 1 |
/* Title: Pure/Thy/bibtex.scala |
58523 | 2 |
Author: Makarius |
3 |
||
58544
340f130b3d38
bibtex support in ML: document antiquotation @{cite} with markup;
wenzelm
parents:
58543
diff
changeset
|
4 |
BibTeX support. |
58523 | 5 |
*/ |
6 |
||
7 |
package isabelle |
|
8 |
||
9 |
||
67290 | 10 |
import java.io.{File => JFile} |
11 |
||
58528 | 12 |
import scala.collection.mutable |
58523 | 13 |
import scala.util.parsing.combinator.RegexParsers |
64824 | 14 |
import scala.util.parsing.input.Reader |
76972 | 15 |
import scala.util.matching.Regex |
58523 | 16 |
|
17 |
||
75393 | 18 |
object Bibtex { |
69255
800b1ce96fce
more general support for Isabelle/PIDE file formats -- less hardwired Bibtex operations;
wenzelm
parents:
68224
diff
changeset
|
19 |
/** file format **/ |
800b1ce96fce
more general support for Isabelle/PIDE file formats -- less hardwired Bibtex operations;
wenzelm
parents:
68224
diff
changeset
|
20 |
|
75393 | 21 |
class File_Format extends isabelle.File_Format { |
69255
800b1ce96fce
more general support for Isabelle/PIDE file formats -- less hardwired Bibtex operations;
wenzelm
parents:
68224
diff
changeset
|
22 |
val format_name: String = "bibtex" |
69257 | 23 |
val file_ext: String = "bib" |
69255
800b1ce96fce
more general support for Isabelle/PIDE file formats -- less hardwired Bibtex operations;
wenzelm
parents:
68224
diff
changeset
|
24 |
|
76792
23f433294173
support for generic File_Format.parse_data, with persistent result in document model;
wenzelm
parents:
76779
diff
changeset
|
25 |
override def parse_data(name: String, text: String): Bibtex.Entries = |
23f433294173
support for generic File_Format.parse_data, with persistent result in document model;
wenzelm
parents:
76779
diff
changeset
|
26 |
Bibtex.Entries.parse(text, file_pos = name) |
23f433294173
support for generic File_Format.parse_data, with persistent result in document model;
wenzelm
parents:
76779
diff
changeset
|
27 |
|
69257 | 28 |
override def theory_suffix: String = "bibtex_file" |
69259 | 29 |
override def theory_content(name: String): String = |
76859 | 30 |
"""theory "bib" imports Pure begin bibtex_file "." end""" |
76849
d431a9340163
more systematic Sessions.illegal_theory, based on File_Format.theory_excluded;
wenzelm
parents:
76829
diff
changeset
|
31 |
override def theory_excluded(name: String): Boolean = name == "bib" |
69255
800b1ce96fce
more general support for Isabelle/PIDE file formats -- less hardwired Bibtex operations;
wenzelm
parents:
68224
diff
changeset
|
32 |
|
75941 | 33 |
override def html_document(snapshot: Document.Snapshot): Option[Browser_Info.HTML_Document] = { |
69255
800b1ce96fce
more general support for Isabelle/PIDE file formats -- less hardwired Bibtex operations;
wenzelm
parents:
68224
diff
changeset
|
34 |
val name = snapshot.node_name |
800b1ce96fce
more general support for Isabelle/PIDE file formats -- less hardwired Bibtex operations;
wenzelm
parents:
68224
diff
changeset
|
35 |
if (detect(name.node)) { |
76829
f2a8ba0b8c96
more robust: avoid detour via somewhat fragile Node.Name.path;
wenzelm
parents:
76793
diff
changeset
|
36 |
val title = "Bibliography " + quote(snapshot.node_name.file_name) |
69255
800b1ce96fce
more general support for Isabelle/PIDE file formats -- less hardwired Bibtex operations;
wenzelm
parents:
68224
diff
changeset
|
37 |
val content = |
800b1ce96fce
more general support for Isabelle/PIDE file formats -- less hardwired Bibtex operations;
wenzelm
parents:
68224
diff
changeset
|
38 |
Isabelle_System.with_tmp_file("bib", "bib") { bib => |
76933 | 39 |
File.write(bib, snapshot.node.source) |
69255
800b1ce96fce
more general support for Isabelle/PIDE file formats -- less hardwired Bibtex operations;
wenzelm
parents:
68224
diff
changeset
|
40 |
Bibtex.html_output(List(bib), style = "unsort", title = title) |
800b1ce96fce
more general support for Isabelle/PIDE file formats -- less hardwired Bibtex operations;
wenzelm
parents:
68224
diff
changeset
|
41 |
} |
75941 | 42 |
Some(Browser_Info.HTML_Document(title, content)) |
69255
800b1ce96fce
more general support for Isabelle/PIDE file formats -- less hardwired Bibtex operations;
wenzelm
parents:
68224
diff
changeset
|
43 |
} |
800b1ce96fce
more general support for Isabelle/PIDE file formats -- less hardwired Bibtex operations;
wenzelm
parents:
68224
diff
changeset
|
44 |
else None |
800b1ce96fce
more general support for Isabelle/PIDE file formats -- less hardwired Bibtex operations;
wenzelm
parents:
68224
diff
changeset
|
45 |
} |
800b1ce96fce
more general support for Isabelle/PIDE file formats -- less hardwired Bibtex operations;
wenzelm
parents:
68224
diff
changeset
|
46 |
} |
800b1ce96fce
more general support for Isabelle/PIDE file formats -- less hardwired Bibtex operations;
wenzelm
parents:
68224
diff
changeset
|
47 |
|
800b1ce96fce
more general support for Isabelle/PIDE file formats -- less hardwired Bibtex operations;
wenzelm
parents:
68224
diff
changeset
|
48 |
|
800b1ce96fce
more general support for Isabelle/PIDE file formats -- less hardwired Bibtex operations;
wenzelm
parents:
68224
diff
changeset
|
49 |
|
67203 | 50 |
/** bibtex errors **/ |
51 |
||
75393 | 52 |
def bibtex_errors(dir: Path, root_name: String): List[String] = { |
67203 | 53 |
val log_path = dir + Path.explode(root_name).ext("blg") |
54 |
if (log_path.is_file) { |
|
67300 | 55 |
val Error1 = """^(I couldn't open database file .+)$""".r |
73730
2f023b2b0e1e
more uniform bibtex error, without using perl (see 4710dd5093a3);
wenzelm
parents:
73565
diff
changeset
|
56 |
val Error2 = """^(I found no .+)$""".r |
2f023b2b0e1e
more uniform bibtex error, without using perl (see 4710dd5093a3);
wenzelm
parents:
73565
diff
changeset
|
57 |
val Error3 = """^(.+)---line (\d+) of file (.+)""".r |
67203 | 58 |
Line.logical_lines(File.read(log_path)).flatMap(line => |
59 |
line match { |
|
67300 | 60 |
case Error1(msg) => Some("Bibtex error: " + msg) |
73730
2f023b2b0e1e
more uniform bibtex error, without using perl (see 4710dd5093a3);
wenzelm
parents:
73565
diff
changeset
|
61 |
case Error2(msg) => Some("Bibtex error: " + msg) |
2f023b2b0e1e
more uniform bibtex error, without using perl (see 4710dd5093a3);
wenzelm
parents:
73565
diff
changeset
|
62 |
case Error3(msg, Value.Int(l), file) => |
67203 | 63 |
val path = File.standard_path(file) |
64 |
if (Path.is_wellformed(path)) { |
|
65 |
val pos = Position.Line_File(l, (dir + Path.explode(path)).canonical.implode) |
|
66 |
Some("Bibtex error" + Position.here(pos) + ":\n " + msg) |
|
67 |
} |
|
68 |
else None |
|
69 |
case _ => None |
|
70 |
}) |
|
71 |
} |
|
72 |
else Nil |
|
73 |
} |
|
74 |
||
75 |
||
76 |
||
67272 | 77 |
/** check database **/ |
78 |
||
75393 | 79 |
def check_database(database: String): (List[(String, Position.T)], List[(String, Position.T)]) = { |
67275
5e427586cb57
check bibtex database on ML side -- for semantic PIDE editing;
wenzelm
parents:
67274
diff
changeset
|
80 |
val chunks = parse(Line.normalize(database)) |
5e427586cb57
check bibtex database on ML side -- for semantic PIDE editing;
wenzelm
parents:
67274
diff
changeset
|
81 |
var chunk_pos = Map.empty[String, Position.T] |
67272 | 82 |
val tokens = new mutable.ListBuffer[(Token, Position.T)] |
83 |
var line = 1 |
|
84 |
var offset = 1 |
|
85 |
||
67276 | 86 |
def make_pos(length: Int): Position.T = |
87 |
Position.Offset(offset) ::: Position.End_Offset(offset + length) ::: Position.Line(line) |
|
67272 | 88 |
|
75393 | 89 |
def advance_pos(tok: Token): Unit = { |
67272 | 90 |
for (s <- Symbol.iterator(tok.source)) { |
91 |
if (Symbol.is_newline(s)) line += 1 |
|
92 |
offset += 1 |
|
93 |
} |
|
94 |
} |
|
95 |
||
96 |
def get_line_pos(l: Int): Position.T = |
|
97 |
if (0 < l && l <= tokens.length) tokens(l - 1)._2 else Position.none |
|
98 |
||
99 |
for (chunk <- chunks) { |
|
100 |
val name = chunk.name |
|
101 |
if (name != "" && !chunk_pos.isDefinedAt(name)) { |
|
67276 | 102 |
chunk_pos += (name -> make_pos(chunk.heading_length)) |
67272 | 103 |
} |
104 |
for (tok <- chunk.tokens) { |
|
67276 | 105 |
tokens += (tok.copy(source = tok.source.replace("\n", " ")) -> make_pos(tok.source.length)) |
67272 | 106 |
advance_pos(tok) |
107 |
} |
|
108 |
} |
|
109 |
||
75394 | 110 |
Isabelle_System.with_tmp_dir("bibtex") { tmp_dir => |
67272 | 111 |
File.write(tmp_dir + Path.explode("root.bib"), |
112 |
tokens.iterator.map(p => p._1.source).mkString("", "\n", "\n")) |
|
113 |
File.write(tmp_dir + Path.explode("root.aux"), |
|
114 |
"\\bibstyle{plain}\n\\bibdata{root}\n\\citation{*}") |
|
115 |
Isabelle_System.bash("\"$ISABELLE_BIBTEX\" root", cwd = tmp_dir.file) |
|
116 |
||
117 |
val Error = """^(.*)---line (\d+) of file root.bib$""".r |
|
118 |
val Warning = """^Warning--(.+)$""".r |
|
119 |
val Warning_Line = """--line (\d+) of file root.bib$""".r |
|
120 |
val Warning_in_Chunk = """^Warning--(.+) in (.+)$""".r |
|
121 |
||
122 |
val log_file = tmp_dir + Path.explode("root.blg") |
|
123 |
val lines = if (log_file.is_file) Line.logical_lines(File.read(log_file)) else Nil |
|
124 |
||
67301 | 125 |
val (errors, warnings) = |
69294 | 126 |
if (lines.isEmpty) (Nil, Nil) |
127 |
else { |
|
128 |
lines.zip(lines.tail ::: List("")).flatMap( |
|
129 |
{ |
|
130 |
case (Error(msg, Value.Int(l)), _) => |
|
131 |
Some((true, (msg, get_line_pos(l)))) |
|
132 |
case (Warning_in_Chunk(msg, name), _) if chunk_pos.isDefinedAt(name) => |
|
133 |
Some((false, (Word.capitalize(msg + " in entry " + quote(name)), chunk_pos(name)))) |
|
134 |
case (Warning(msg), Warning_Line(Value.Int(l))) => |
|
135 |
Some((false, (Word.capitalize(msg), get_line_pos(l)))) |
|
136 |
case (Warning(msg), _) => |
|
137 |
Some((false, (Word.capitalize(msg), Position.none))) |
|
138 |
case _ => None |
|
139 |
} |
|
140 |
).partition(_._1) |
|
141 |
} |
|
67301 | 142 |
(errors.map(_._2), warnings.map(_._2)) |
75394 | 143 |
} |
67272 | 144 |
} |
145 |
||
75393 | 146 |
object Check_Database extends Scala.Fun_String("bibtex_check_database") { |
72756 | 147 |
val here = Scala_Project.here |
75393 | 148 |
def apply(database: String): String = { |
72193 | 149 |
import XML.Encode._ |
150 |
YXML.string_of_body(pair(list(pair(string, properties)), list(pair(string, properties)))( |
|
151 |
check_database(database))) |
|
152 |
} |
|
67275
5e427586cb57
check bibtex database on ML side -- for semantic PIDE editing;
wenzelm
parents:
67274
diff
changeset
|
153 |
} |
5e427586cb57
check bibtex database on ML side -- for semantic PIDE editing;
wenzelm
parents:
67274
diff
changeset
|
154 |
|
67272 | 155 |
|
156 |
||
64828 | 157 |
/** document model **/ |
158 |
||
67290 | 159 |
/* entries */ |
64828 | 160 |
|
76776 | 161 |
object Entries { |
162 |
val empty: Entries = apply(Nil) |
|
163 |
||
76778
4086a0e4723b
clarified signature: internalize errors (but: the parser rarely fails);
wenzelm
parents:
76776
diff
changeset
|
164 |
def apply(entries: List[Text.Info[String]], errors: List[String] = Nil): Entries = |
4086a0e4723b
clarified signature: internalize errors (but: the parser rarely fails);
wenzelm
parents:
76776
diff
changeset
|
165 |
new Entries(entries, errors) |
76776 | 166 |
|
76779 | 167 |
def parse(text: String, file_pos: String = ""): Entries = { |
76778
4086a0e4723b
clarified signature: internalize errors (but: the parser rarely fails);
wenzelm
parents:
76776
diff
changeset
|
168 |
val entries = new mutable.ListBuffer[Text.Info[String]] |
76776 | 169 |
var offset = 0 |
76779 | 170 |
var line = 1 |
171 |
var err_line = 0 |
|
76778
4086a0e4723b
clarified signature: internalize errors (but: the parser rarely fails);
wenzelm
parents:
76776
diff
changeset
|
172 |
|
4086a0e4723b
clarified signature: internalize errors (but: the parser rarely fails);
wenzelm
parents:
76776
diff
changeset
|
173 |
try { |
4086a0e4723b
clarified signature: internalize errors (but: the parser rarely fails);
wenzelm
parents:
76776
diff
changeset
|
174 |
for (chunk <- Bibtex.parse(text)) { |
4086a0e4723b
clarified signature: internalize errors (but: the parser rarely fails);
wenzelm
parents:
76776
diff
changeset
|
175 |
val end_offset = offset + chunk.source.length |
4086a0e4723b
clarified signature: internalize errors (but: the parser rarely fails);
wenzelm
parents:
76776
diff
changeset
|
176 |
if (chunk.name != "" && !chunk.is_command) { |
4086a0e4723b
clarified signature: internalize errors (but: the parser rarely fails);
wenzelm
parents:
76776
diff
changeset
|
177 |
entries += Text.Info(Text.Range(offset, end_offset), chunk.name) |
4086a0e4723b
clarified signature: internalize errors (but: the parser rarely fails);
wenzelm
parents:
76776
diff
changeset
|
178 |
} |
76779 | 179 |
if (chunk.is_malformed && err_line == 0) { err_line = line } |
76778
4086a0e4723b
clarified signature: internalize errors (but: the parser rarely fails);
wenzelm
parents:
76776
diff
changeset
|
180 |
offset = end_offset |
77007 | 181 |
line += Library.count_newlines(chunk.source) |
76778
4086a0e4723b
clarified signature: internalize errors (but: the parser rarely fails);
wenzelm
parents:
76776
diff
changeset
|
182 |
} |
76779 | 183 |
|
184 |
val err_pos = |
|
185 |
if (err_line == 0 || file_pos.isEmpty) Position.none |
|
186 |
else Position.Line_File(err_line, file_pos) |
|
187 |
val errors = |
|
188 |
if (err_line == 0) Nil |
|
189 |
else List("Malformed bibtex file" + Position.here(err_pos)) |
|
190 |
||
191 |
apply(entries.toList, errors = errors) |
|
76776 | 192 |
} |
76778
4086a0e4723b
clarified signature: internalize errors (but: the parser rarely fails);
wenzelm
parents:
76776
diff
changeset
|
193 |
catch { case ERROR(msg) => apply(Nil, errors = List(msg)) } |
64828 | 194 |
} |
76776 | 195 |
|
196 |
def iterator[A <: Document.Model](models: Iterable[A]): Iterator[Text.Info[(String, A)]] = |
|
197 |
for { |
|
198 |
model <- models.iterator |
|
76792
23f433294173
support for generic File_Format.parse_data, with persistent result in document model;
wenzelm
parents:
76779
diff
changeset
|
199 |
bibtex_entries <- model.get_data(classOf[Entries]).iterator |
23f433294173
support for generic File_Format.parse_data, with persistent result in document model;
wenzelm
parents:
76779
diff
changeset
|
200 |
info <- bibtex_entries.entries.iterator |
76776 | 201 |
} yield info.map((_, model)) |
64828 | 202 |
} |
203 |
||
76778
4086a0e4723b
clarified signature: internalize errors (but: the parser rarely fails);
wenzelm
parents:
76776
diff
changeset
|
204 |
final class Entries private(val entries: List[Text.Info[String]], val errors: List[String]) { |
76793 | 205 |
override def toString: String = "Bibtex.Entries(" + entries.length + ")" |
206 |
||
76778
4086a0e4723b
clarified signature: internalize errors (but: the parser rarely fails);
wenzelm
parents:
76776
diff
changeset
|
207 |
def ::: (other: Entries): Entries = |
4086a0e4723b
clarified signature: internalize errors (but: the parser rarely fails);
wenzelm
parents:
76776
diff
changeset
|
208 |
new Entries(entries ::: other.entries, errors ::: other.errors) |
66150 | 209 |
} |
210 |
||
64828 | 211 |
|
66152 | 212 |
/* completion */ |
213 |
||
76776 | 214 |
def completion[A <: Document.Model]( |
75393 | 215 |
history: Completion.History, |
216 |
rendering: Rendering, |
|
217 |
caret: Text.Offset, |
|
76776 | 218 |
models: Iterable[A] |
75393 | 219 |
): Option[Completion.Result] = { |
66152 | 220 |
for { |
72729 | 221 |
Text.Info(r, name) <- rendering.citations(rendering.before_caret_range(caret)).headOption |
66152 | 222 |
name1 <- Completion.clean_name(name) |
223 |
||
72856 | 224 |
original <- rendering.get_text(r) |
66152 | 225 |
original1 <- Completion.clean_name(Library.perhaps_unquote(original)) |
226 |
||
227 |
entries = |
|
228 |
(for { |
|
76776 | 229 |
Text.Info(_, (entry, _)) <- Entries.iterator(models) |
66152 | 230 |
if entry.toLowerCase.containsSlice(name1.toLowerCase) && entry != original1 |
231 |
} yield entry).toList |
|
232 |
if entries.nonEmpty |
|
233 |
||
234 |
items = |
|
235 |
entries.sorted.map({ |
|
236 |
case entry => |
|
237 |
val full_name = Long_Name.qualify(Markup.CITATION, entry) |
|
238 |
val description = List(entry, "(BibTeX entry)") |
|
239 |
val replacement = quote(entry) |
|
75659
9bd92ac9328f
more robust Scala 3 indentation, for the sake of IntelliJ IDEA;
wenzelm
parents:
75420
diff
changeset
|
240 |
Completion.Item(r, original, full_name, description, replacement, 0, false) |
66152 | 241 |
}).sorted(history.ordering).take(rendering.options.int("completion_limit")) |
242 |
} yield Completion.Result(r, original, false, items) |
|
243 |
} |
|
244 |
||
245 |
||
64828 | 246 |
|
58523 | 247 |
/** content **/ |
248 |
||
58529 | 249 |
private val months = List( |
58523 | 250 |
"jan", |
251 |
"feb", |
|
252 |
"mar", |
|
253 |
"apr", |
|
254 |
"may", |
|
255 |
"jun", |
|
256 |
"jul", |
|
257 |
"aug", |
|
258 |
"sep", |
|
259 |
"oct", |
|
260 |
"nov", |
|
261 |
"dec") |
|
58529 | 262 |
def is_month(s: String): Boolean = months.contains(s.toLowerCase) |
58523 | 263 |
|
58529 | 264 |
private val commands = List("preamble", "string") |
265 |
def is_command(s: String): Boolean = commands.contains(s.toLowerCase) |
|
58523 | 266 |
|
58524 | 267 |
sealed case class Entry( |
58526 | 268 |
kind: String, |
58523 | 269 |
required: List[String], |
270 |
optional_crossref: List[String], |
|
75393 | 271 |
optional_other: List[String] |
272 |
) { |
|
70230 | 273 |
val optional_standard: List[String] = List("url", "doi", "ee") |
274 |
||
58533 | 275 |
def is_required(s: String): Boolean = required.contains(s.toLowerCase) |
58529 | 276 |
def is_optional(s: String): Boolean = |
70230 | 277 |
optional_crossref.contains(s.toLowerCase) || |
278 |
optional_other.contains(s.toLowerCase) || |
|
279 |
optional_standard.contains(s.toLowerCase) |
|
58529 | 280 |
|
70230 | 281 |
def fields: List[String] = |
282 |
required ::: optional_crossref ::: optional_other ::: optional_standard |
|
283 |
||
58524 | 284 |
def template: String = |
58526 | 285 |
"@" + kind + "{,\n" + fields.map(x => " " + x + " = {},\n").mkString + "}\n" |
58524 | 286 |
} |
58523 | 287 |
|
66150 | 288 |
val known_entries: List[Entry] = |
58524 | 289 |
List( |
290 |
Entry("Article", |
|
291 |
List("author", "title"), |
|
292 |
List("journal", "year"), |
|
293 |
List("volume", "number", "pages", "month", "note")), |
|
294 |
Entry("InProceedings", |
|
295 |
List("author", "title"), |
|
296 |
List("booktitle", "year"), |
|
297 |
List("editor", "volume", "number", "series", "pages", "month", "address", |
|
298 |
"organization", "publisher", "note")), |
|
299 |
Entry("InCollection", |
|
300 |
List("author", "title", "booktitle"), |
|
301 |
List("publisher", "year"), |
|
302 |
List("editor", "volume", "number", "series", "type", "chapter", "pages", |
|
303 |
"edition", "month", "address", "note")), |
|
304 |
Entry("InBook", |
|
305 |
List("author", "editor", "title", "chapter"), |
|
306 |
List("publisher", "year"), |
|
307 |
List("volume", "number", "series", "type", "address", "edition", "month", "pages", "note")), |
|
308 |
Entry("Proceedings", |
|
309 |
List("title", "year"), |
|
310 |
List(), |
|
311 |
List("booktitle", "editor", "volume", "number", "series", "address", "month", |
|
312 |
"organization", "publisher", "note")), |
|
313 |
Entry("Book", |
|
314 |
List("author", "editor", "title"), |
|
315 |
List("publisher", "year"), |
|
316 |
List("volume", "number", "series", "address", "edition", "month", "note")), |
|
317 |
Entry("Booklet", |
|
318 |
List("title"), |
|
319 |
List(), |
|
320 |
List("author", "howpublished", "address", "month", "year", "note")), |
|
321 |
Entry("PhdThesis", |
|
322 |
List("author", "title", "school", "year"), |
|
323 |
List(), |
|
324 |
List("type", "address", "month", "note")), |
|
325 |
Entry("MastersThesis", |
|
326 |
List("author", "title", "school", "year"), |
|
327 |
List(), |
|
328 |
List("type", "address", "month", "note")), |
|
329 |
Entry("TechReport", |
|
330 |
List("author", "title", "institution", "year"), |
|
331 |
List(), |
|
332 |
List("type", "number", "address", "month", "note")), |
|
333 |
Entry("Manual", |
|
334 |
List("title"), |
|
335 |
List(), |
|
336 |
List("author", "organization", "address", "edition", "month", "year", "note")), |
|
337 |
Entry("Unpublished", |
|
338 |
List("author", "title", "note"), |
|
339 |
List(), |
|
340 |
List("month", "year")), |
|
341 |
Entry("Misc", |
|
342 |
List(), |
|
343 |
List(), |
|
344 |
List("author", "title", "howpublished", "month", "year", "note"))) |
|
58523 | 345 |
|
58529 | 346 |
def get_entry(kind: String): Option[Entry] = |
66150 | 347 |
known_entries.find(entry => entry.kind.toLowerCase == kind.toLowerCase) |
58529 | 348 |
|
58530 | 349 |
def is_entry(kind: String): Boolean = get_entry(kind).isDefined |
350 |
||
58523 | 351 |
|
352 |
||
353 |
/** tokens and chunks **/ |
|
354 |
||
75393 | 355 |
object Token { |
356 |
object Kind extends Enumeration { |
|
58529 | 357 |
val COMMAND = Value("command") |
358 |
val ENTRY = Value("entry") |
|
58523 | 359 |
val KEYWORD = Value("keyword") |
360 |
val NAT = Value("natural number") |
|
58529 | 361 |
val STRING = Value("string") |
58531 | 362 |
val NAME = Value("name") |
58523 | 363 |
val IDENT = Value("identifier") |
58535 | 364 |
val SPACE = Value("white space") |
365 |
val COMMENT = Value("ignored text") |
|
58523 | 366 |
val ERROR = Value("bad input") |
367 |
} |
|
368 |
} |
|
369 |
||
75393 | 370 |
sealed case class Token(kind: Token.Kind.Value, source: String) { |
58530 | 371 |
def is_kind: Boolean = |
372 |
kind == Token.Kind.COMMAND || |
|
373 |
kind == Token.Kind.ENTRY || |
|
374 |
kind == Token.Kind.IDENT |
|
58531 | 375 |
def is_name: Boolean = |
376 |
kind == Token.Kind.NAME || |
|
377 |
kind == Token.Kind.IDENT |
|
58535 | 378 |
def is_ignored: Boolean = |
379 |
kind == Token.Kind.SPACE || |
|
380 |
kind == Token.Kind.COMMENT |
|
67276 | 381 |
def is_malformed: Boolean = |
382 |
kind == Token.Kind.ERROR |
|
383 |
def is_open: Boolean = |
|
384 |
kind == Token.Kind.KEYWORD && (source == "{" || source == "(") |
|
58523 | 385 |
} |
386 |
||
75393 | 387 |
case class Chunk(kind: String, tokens: List[Token]) { |
58529 | 388 |
val source = tokens.map(_.source).mkString |
58526 | 389 |
|
58530 | 390 |
private val content: Option[List[Token]] = |
58523 | 391 |
tokens match { |
59319 | 392 |
case Token(Token.Kind.KEYWORD, "@") :: body if body.nonEmpty => |
58529 | 393 |
(body.init.filterNot(_.is_ignored), body.last) match { |
58530 | 394 |
case (tok :: Token(Token.Kind.KEYWORD, "{") :: toks, Token(Token.Kind.KEYWORD, "}")) |
395 |
if tok.is_kind => Some(toks) |
|
396 |
||
397 |
case (tok :: Token(Token.Kind.KEYWORD, "(") :: toks, Token(Token.Kind.KEYWORD, ")")) |
|
398 |
if tok.is_kind => Some(toks) |
|
399 |
||
58528 | 400 |
case _ => None |
58523 | 401 |
} |
58528 | 402 |
case _ => None |
58526 | 403 |
} |
404 |
||
405 |
def name: String = |
|
58530 | 406 |
content match { |
58531 | 407 |
case Some(tok :: _) if tok.is_name => tok.source |
58523 | 408 |
case _ => "" |
409 |
} |
|
58530 | 410 |
|
67276 | 411 |
def heading_length: Int = |
412 |
if (name == "") 1 |
|
73359 | 413 |
else { |
414 |
tokens.takeWhile(tok => !tok.is_open).foldLeft(0) { case (n, tok) => n + tok.source.length } |
|
415 |
} |
|
67276 | 416 |
|
58530 | 417 |
def is_ignored: Boolean = kind == "" && tokens.forall(_.is_ignored) |
76779 | 418 |
def is_malformed: Boolean = tokens.exists(_.is_malformed) |
58543 | 419 |
def is_command: Boolean = Bibtex.is_command(kind) && name != "" && content.isDefined |
420 |
def is_entry: Boolean = Bibtex.is_entry(kind) && name != "" && content.isDefined |
|
58523 | 421 |
} |
422 |
||
423 |
||
424 |
||
425 |
/** parsing **/ |
|
426 |
||
427 |
// context of partial line-oriented scans |
|
428 |
abstract class Line_Context |
|
58589 | 429 |
case object Ignored extends Line_Context |
58590
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
430 |
case object At extends Line_Context |
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
431 |
case class Item_Start(kind: String) extends Line_Context |
58591 | 432 |
case class Item_Open(kind: String, end: String) extends Line_Context |
433 |
case class Item(kind: String, end: String, delim: Delimited) extends Line_Context |
|
58590
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
434 |
|
58528 | 435 |
case class Delimited(quoted: Boolean, depth: Int) |
436 |
val Closed = Delimited(false, 0) |
|
58523 | 437 |
|
438 |
private def token(kind: Token.Kind.Value)(source: String): Token = Token(kind, source) |
|
439 |
private def keyword(source: String): Token = Token(Token.Kind.KEYWORD, source) |
|
440 |
||
441 |
||
68224 | 442 |
// See also https://ctan.org/tex-archive/biblio/bibtex/base/bibtex.web |
58523 | 443 |
// module @<Scan for and process a \.{.bib} command or database entry@>. |
444 |
||
75393 | 445 |
object Parsers extends RegexParsers { |
58523 | 446 |
/* white space and comments */ |
447 |
||
448 |
override val whiteSpace = "".r |
|
449 |
||
58535 | 450 |
private val space = """[ \t\n\r]+""".r ^^ token(Token.Kind.SPACE) |
58590
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
451 |
private val spaces = rep(space) |
58523 | 452 |
|
58528 | 453 |
|
58535 | 454 |
/* ignored text */ |
58528 | 455 |
|
456 |
private val ignored: Parser[Chunk] = |
|
58609 | 457 |
rep1("""(?i)([^@]+|@[ \t]*comment)""".r) ^^ { |
58535 | 458 |
case ss => Chunk("", List(Token(Token.Kind.COMMENT, ss.mkString))) } |
58523 | 459 |
|
58536
402a8e8107a7
more total chunk_line: recovery via ignored_line;
wenzelm
parents:
58535
diff
changeset
|
460 |
private def ignored_line: Parser[(Chunk, Line_Context)] = |
58589 | 461 |
ignored ^^ { case a => (a, Ignored) } |
58536
402a8e8107a7
more total chunk_line: recovery via ignored_line;
wenzelm
parents:
58535
diff
changeset
|
462 |
|
58523 | 463 |
|
464 |
/* delimited string: outermost "..." or {...} and body with balanced {...} */ |
|
465 |
||
58534 | 466 |
// see also bibtex.web: scan_a_field_token_and_eat_white, scan_balanced_braces |
58523 | 467 |
private def delimited_depth(delim: Delimited): Parser[(String, Delimited)] = |
75393 | 468 |
new Parser[(String, Delimited)] { |
73120
c3589f2dff31
more informative errors: simplify diagnosis of spurious failures reported by users;
wenzelm
parents:
72957
diff
changeset
|
469 |
require(if (delim.quoted) delim.depth > 0 else delim.depth >= 0, |
c3589f2dff31
more informative errors: simplify diagnosis of spurious failures reported by users;
wenzelm
parents:
72957
diff
changeset
|
470 |
"bad delimiter depth") |
58523 | 471 |
|
75393 | 472 |
def apply(in: Input) = { |
58523 | 473 |
val start = in.offset |
474 |
val end = in.source.length |
|
475 |
||
476 |
var i = start |
|
477 |
var q = delim.quoted |
|
478 |
var d = delim.depth |
|
479 |
var finished = false |
|
480 |
while (!finished && i < end) { |
|
481 |
val c = in.source.charAt(i) |
|
58534 | 482 |
|
58523 | 483 |
if (c == '"' && d == 0) { i += 1; d = 1; q = true } |
58532 | 484 |
else if (c == '"' && d == 1 && q) { |
485 |
i += 1; d = 0; q = false; finished = true |
|
486 |
} |
|
58523 | 487 |
else if (c == '{') { i += 1; d += 1 } |
58534 | 488 |
else if (c == '}') { |
489 |
if (d == 1 && !q || d > 1) { i += 1; d -= 1; if (d == 0) finished = true } |
|
490 |
else {i = start; finished = true } |
|
58532 | 491 |
} |
58523 | 492 |
else if (d > 0) i += 1 |
493 |
else finished = true |
|
494 |
} |
|
495 |
if (i == start) Failure("bad input", in) |
|
58528 | 496 |
else { |
497 |
val s = in.source.subSequence(start, i).toString |
|
498 |
Success((s, Delimited(q, d)), in.drop(i - start)) |
|
499 |
} |
|
58523 | 500 |
} |
501 |
}.named("delimited_depth") |
|
502 |
||
58528 | 503 |
private def delimited: Parser[Token] = |
504 |
delimited_depth(Closed) ^? |
|
505 |
{ case (s, delim) if delim == Closed => Token(Token.Kind.STRING, s) } |
|
58523 | 506 |
|
58534 | 507 |
private def recover_delimited: Parser[Token] = |
58609 | 508 |
"""["{][^@]*""".r ^^ token(Token.Kind.ERROR) |
58534 | 509 |
|
58590
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
510 |
def delimited_line(ctxt: Item): Parser[(Chunk, Line_Context)] = |
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
511 |
delimited_depth(ctxt.delim) ^^ { case (s, delim1) => |
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
512 |
(Chunk(ctxt.kind, List(Token(Token.Kind.STRING, s))), ctxt.copy(delim = delim1)) } | |
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
513 |
recover_delimited ^^ { case a => (Chunk(ctxt.kind, List(a)), Ignored) } |
58523 | 514 |
|
515 |
||
516 |
/* other tokens */ |
|
517 |
||
518 |
private val at = "@" ^^ keyword |
|
519 |
||
520 |
private val nat = "[0-9]+".r ^^ token(Token.Kind.NAT) |
|
521 |
||
58591 | 522 |
private val name = """[\x21-\x7f&&[^"#%'(),={}]]+""".r ^^ token(Token.Kind.NAME) |
523 |
||
58529 | 524 |
private val identifier = |
525 |
"""[\x21-\x7f&&[^"#%'(),={}0-9]][\x21-\x7f&&[^"#%'(),={}]]*""".r |
|
526 |
||
527 |
private val ident = identifier ^^ token(Token.Kind.IDENT) |
|
58523 | 528 |
|
58528 | 529 |
val other_token = "[=#,]".r ^^ keyword | (nat | (ident | space)) |
530 |
||
531 |
||
58591 | 532 |
/* body */ |
533 |
||
534 |
private val body = |
|
535 |
delimited | (recover_delimited | other_token) |
|
536 |
||
537 |
private def body_line(ctxt: Item) = |
|
538 |
if (ctxt.delim.depth > 0) |
|
539 |
delimited_line(ctxt) |
|
540 |
else |
|
541 |
delimited_line(ctxt) | |
|
542 |
other_token ^^ { case a => (Chunk(ctxt.kind, List(a)), ctxt) } | |
|
543 |
ctxt.end ^^ { case a => (Chunk(ctxt.kind, List(keyword(a))), Ignored) } |
|
544 |
||
545 |
||
58530 | 546 |
/* items: command or entry */ |
58528 | 547 |
|
58530 | 548 |
private val item_kind = |
58529 | 549 |
identifier ^^ { case a => |
550 |
val kind = |
|
551 |
if (is_command(a)) Token.Kind.COMMAND |
|
58530 | 552 |
else if (is_entry(a)) Token.Kind.ENTRY |
58529 | 553 |
else Token.Kind.IDENT |
554 |
Token(kind, a) |
|
555 |
} |
|
556 |
||
58591 | 557 |
private val item_begin = |
558 |
"{" ^^ { case a => ("}", keyword(a)) } | |
|
559 |
"(" ^^ { case a => (")", keyword(a)) } |
|
560 |
||
561 |
private def item_name(kind: String) = |
|
562 |
kind.toLowerCase match { |
|
563 |
case "preamble" => failure("") |
|
564 |
case "string" => identifier ^^ token(Token.Kind.NAME) |
|
565 |
case _ => name |
|
566 |
} |
|
567 |
||
58531 | 568 |
private val item_start = |
58590
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
569 |
at ~ spaces ~ item_kind ~ spaces ^^ |
58528 | 570 |
{ case a ~ b ~ c ~ d => (c.source, List(a) ::: b ::: List(c) ::: d) } |
571 |
||
58530 | 572 |
private val item: Parser[Chunk] = |
58591 | 573 |
(item_start ~ item_begin ~ spaces) into |
574 |
{ case (kind, a) ~ ((end, b)) ~ c => |
|
575 |
opt(item_name(kind)) ~ rep(body) ~ opt(end ^^ keyword) ^^ { |
|
576 |
case d ~ e ~ f => Chunk(kind, a ::: List(b) ::: c ::: d.toList ::: e ::: f.toList) } } |
|
58528 | 577 |
|
58530 | 578 |
private val recover_item: Parser[Chunk] = |
58609 | 579 |
at ~ "[^@]*".r ^^ { case a ~ b => Chunk("", List(a, Token(Token.Kind.ERROR, b))) } |
58528 | 580 |
|
58591 | 581 |
|
582 |
/* chunks */ |
|
58523 | 583 |
|
58528 | 584 |
val chunk: Parser[Chunk] = ignored | (item | recover_item) |
58523 | 585 |
|
75393 | 586 |
def chunk_line(ctxt: Line_Context): Parser[(Chunk, Line_Context)] = { |
58530 | 587 |
ctxt match { |
58589 | 588 |
case Ignored => |
58538
299b82d12d53
proper treatment of @comment (amending 402a8e8107a7);
wenzelm
parents:
58536
diff
changeset
|
589 |
ignored_line | |
58590
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
590 |
at ^^ { case a => (Chunk("", List(a)), At) } |
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
591 |
|
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
592 |
case At => |
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
593 |
space ^^ { case a => (Chunk("", List(a)), ctxt) } | |
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
594 |
item_kind ^^ { case a => (Chunk(a.source, List(a)), Item_Start(a.source)) } | |
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
595 |
recover_item ^^ { case a => (a, Ignored) } | |
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
596 |
ignored_line |
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
597 |
|
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
598 |
case Item_Start(kind) => |
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
599 |
space ^^ { case a => (Chunk(kind, List(a)), ctxt) } | |
58591 | 600 |
item_begin ^^ { case (end, a) => (Chunk(kind, List(a)), Item_Open(kind, end)) } | |
58596 | 601 |
recover_item ^^ { case a => (a, Ignored) } | |
58590
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
602 |
ignored_line |
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
603 |
|
58591 | 604 |
case Item_Open(kind, end) => |
58590
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
605 |
space ^^ { case a => (Chunk(kind, List(a)), ctxt) } | |
58591 | 606 |
item_name(kind) ^^ { case a => (Chunk(kind, List(a)), Item(kind, end, Closed)) } | |
607 |
body_line(Item(kind, end, Closed)) | |
|
58590
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
608 |
ignored_line |
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
609 |
|
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
610 |
case item_ctxt: Item => |
58591 | 611 |
body_line(item_ctxt) | |
58590
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
612 |
ignored_line |
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
613 |
|
58530 | 614 |
case _ => failure("") |
615 |
} |
|
616 |
} |
|
58528 | 617 |
} |
58523 | 618 |
|
619 |
||
58528 | 620 |
/* parse */ |
58523 | 621 |
|
622 |
def parse(input: CharSequence): List[Chunk] = |
|
64824 | 623 |
Parsers.parseAll(Parsers.rep(Parsers.chunk), Scan.char_reader(input)) match { |
58523 | 624 |
case Parsers.Success(result, _) => result |
58526 | 625 |
case _ => error("Unexpected failure to parse input:\n" + input.toString) |
58523 | 626 |
} |
58528 | 627 |
|
75393 | 628 |
def parse_line(input: CharSequence, context: Line_Context): (List[Chunk], Line_Context) = { |
64824 | 629 |
var in: Reader[Char] = Scan.char_reader(input) |
58528 | 630 |
val chunks = new mutable.ListBuffer[Chunk] |
631 |
var ctxt = context |
|
632 |
while (!in.atEnd) { |
|
75420 | 633 |
val result = Parsers.parse(Parsers.chunk_line(ctxt), in) |
634 |
(result : @unchecked) match { |
|
60215 | 635 |
case Parsers.Success((x, c), rest) => chunks += x; ctxt = c; in = rest |
58528 | 636 |
case Parsers.NoSuccess(_, rest) => |
637 |
error("Unepected failure to parse input:\n" + rest.source.toString) |
|
638 |
} |
|
639 |
} |
|
640 |
(chunks.toList, ctxt) |
|
641 |
} |
|
67243 | 642 |
|
643 |
||
644 |
||
645 |
/** HTML output **/ |
|
646 |
||
647 |
private val output_styles = |
|
648 |
List( |
|
67257 | 649 |
"" -> "html-n", |
67243 | 650 |
"plain" -> "html-n", |
651 |
"alpha" -> "html-a", |
|
652 |
"named" -> "html-n", |
|
653 |
"paragraph" -> "html-n", |
|
654 |
"unsort" -> "html-u", |
|
655 |
"unsortlist" -> "html-u") |
|
656 |
||
657 |
def html_output(bib: List[Path], |
|
67256 | 658 |
title: String = "Bibliography", |
67248
68177abb2988
isabelle.preview presents bibtex database files as well;
wenzelm
parents:
67243
diff
changeset
|
659 |
body: Boolean = false, |
67243 | 660 |
citations: List[String] = List("*"), |
67257 | 661 |
style: String = "", |
75393 | 662 |
chronological: Boolean = false |
663 |
): String = { |
|
75394 | 664 |
Isabelle_System.with_tmp_dir("bibtex") { tmp_dir => |
67243 | 665 |
/* database files */ |
666 |
||
69367 | 667 |
val bib_files = bib.map(_.drop_ext) |
75393 | 668 |
val bib_names = { |
69366 | 669 |
val names0 = bib_files.map(_.file_name) |
67243 | 670 |
if (Library.duplicates(names0).isEmpty) names0 |
671 |
else names0.zipWithIndex.map({ case (name, i) => (i + 1).toString + "-" + name }) |
|
672 |
} |
|
673 |
||
674 |
for ((a, b) <- bib_files zip bib_names) { |
|
73317 | 675 |
Isabelle_System.copy_file(a.ext("bib"), tmp_dir + Path.basic(b).ext("bib")) |
67243 | 676 |
} |
677 |
||
678 |
||
679 |
/* style file */ |
|
680 |
||
681 |
val bst = |
|
682 |
output_styles.toMap.get(style) match { |
|
683 |
case Some(base) => base + (if (chronological) "c" else "") + ".bst" |
|
684 |
case None => |
|
685 |
error("Bad style for bibtex HTML output: " + quote(style) + |
|
686 |
"\n(expected: " + commas_quote(output_styles.map(_._1)) + ")") |
|
687 |
} |
|
73317 | 688 |
Isabelle_System.copy_file(Path.explode("$BIB2XHTML_HOME/bst") + Path.explode(bst), tmp_dir) |
67243 | 689 |
|
690 |
||
691 |
/* result */ |
|
692 |
||
693 |
val in_file = Path.explode("bib.aux") |
|
694 |
val out_file = Path.explode("bib.html") |
|
695 |
||
696 |
File.write(tmp_dir + in_file, |
|
697 |
bib_names.mkString("\\bibdata{", ",", "}\n") + |
|
698 |
citations.map(cite => "\\citation{" + cite + "}\n").mkString) |
|
699 |
||
700 |
Isabelle_System.bash( |
|
701 |
"\"$BIB2XHTML_HOME/main/bib2xhtml.pl\" -B \"$ISABELLE_BIBTEX\"" + |
|
67257 | 702 |
" -u -s " + Bash.string(proper_string(style) getOrElse "empty") + |
703 |
(if (chronological) " -c" else "") + |
|
67256 | 704 |
(if (title != "") " -h " + Bash.string(title) + " " else "") + |
705 |
" " + File.bash_path(in_file) + " " + File.bash_path(out_file), |
|
67243 | 706 |
cwd = tmp_dir.file).check |
707 |
||
708 |
val html = File.read(tmp_dir + out_file) |
|
709 |
||
67248
68177abb2988
isabelle.preview presents bibtex database files as well;
wenzelm
parents:
67243
diff
changeset
|
710 |
if (body) { |
68177abb2988
isabelle.preview presents bibtex database files as well;
wenzelm
parents:
67243
diff
changeset
|
711 |
cat_lines( |
68177abb2988
isabelle.preview presents bibtex database files as well;
wenzelm
parents:
67243
diff
changeset
|
712 |
split_lines(html). |
68177abb2988
isabelle.preview presents bibtex database files as well;
wenzelm
parents:
67243
diff
changeset
|
713 |
dropWhile(line => !line.startsWith("<!-- BEGIN BIBLIOGRAPHY")).reverse. |
68177abb2988
isabelle.preview presents bibtex database files as well;
wenzelm
parents:
67243
diff
changeset
|
714 |
dropWhile(line => !line.startsWith("<!-- END BIBLIOGRAPHY")).reverse) |
68177abb2988
isabelle.preview presents bibtex database files as well;
wenzelm
parents:
67243
diff
changeset
|
715 |
} |
68177abb2988
isabelle.preview presents bibtex database files as well;
wenzelm
parents:
67243
diff
changeset
|
716 |
else html |
75394 | 717 |
} |
67243 | 718 |
} |
76972 | 719 |
|
720 |
||
721 |
||
722 |
/** cite commands and antiquotations **/ |
|
723 |
||
724 |
def cite_antiquotation(name: String, body: String): String = |
|
725 |
"""\<^""" + name + """>\<open>""" + body + """\<close>""" |
|
726 |
||
727 |
def cite_antiquotation(name: String, location: String, citations: List[String]): String = { |
|
728 |
val body = |
|
729 |
(if (location.isEmpty) "" else Symbol.cartouche(location) + " in ") + |
|
730 |
citations.map(quote).mkString(" and ") |
|
731 |
cite_antiquotation(name, body) |
|
732 |
} |
|
733 |
||
734 |
private val Cite_Command = """\\(cite|nocite|citet|citep)((?:\[[^\]]*\])?)\{([^}]*)\}""".r |
|
735 |
private val Cite_Macro = """\[\s*cite_macro\s*=\s*"?(\w+)"?\]\s*""".r |
|
76986 | 736 |
private val CITE = "cite" |
76972 | 737 |
|
76984
29432d4a376d
more robust: rely on PIDE markup instead of regex guess;
wenzelm
parents:
76972
diff
changeset
|
738 |
def update_cite_commands(str: String): String = |
29432d4a376d
more robust: rely on PIDE markup instead of regex guess;
wenzelm
parents:
76972
diff
changeset
|
739 |
Cite_Command.replaceAllIn(str, { m => |
29432d4a376d
more robust: rely on PIDE markup instead of regex guess;
wenzelm
parents:
76972
diff
changeset
|
740 |
val name = m.group(1) |
29432d4a376d
more robust: rely on PIDE markup instead of regex guess;
wenzelm
parents:
76972
diff
changeset
|
741 |
val loc = m.group(2) |
29432d4a376d
more robust: rely on PIDE markup instead of regex guess;
wenzelm
parents:
76972
diff
changeset
|
742 |
val location = |
29432d4a376d
more robust: rely on PIDE markup instead of regex guess;
wenzelm
parents:
76972
diff
changeset
|
743 |
if (loc.startsWith("[") && loc.endsWith("]")) loc.substring(1, loc.length - 1) |
29432d4a376d
more robust: rely on PIDE markup instead of regex guess;
wenzelm
parents:
76972
diff
changeset
|
744 |
else loc |
29432d4a376d
more robust: rely on PIDE markup instead of regex guess;
wenzelm
parents:
76972
diff
changeset
|
745 |
val citations = Library.space_explode(',', m.group(3)).map(_.trim) |
29432d4a376d
more robust: rely on PIDE markup instead of regex guess;
wenzelm
parents:
76972
diff
changeset
|
746 |
Regex.quoteReplacement(cite_antiquotation(name, location, citations)) |
29432d4a376d
more robust: rely on PIDE markup instead of regex guess;
wenzelm
parents:
76972
diff
changeset
|
747 |
}) |
29432d4a376d
more robust: rely on PIDE markup instead of regex guess;
wenzelm
parents:
76972
diff
changeset
|
748 |
|
76986 | 749 |
def update_cite_antiquotation(cite_commands: List[String], str: String): String = { |
76984
29432d4a376d
more robust: rely on PIDE markup instead of regex guess;
wenzelm
parents:
76972
diff
changeset
|
750 |
val opt_body = |
29432d4a376d
more robust: rely on PIDE markup instead of regex guess;
wenzelm
parents:
76972
diff
changeset
|
751 |
for { |
29432d4a376d
more robust: rely on PIDE markup instead of regex guess;
wenzelm
parents:
76972
diff
changeset
|
752 |
str1 <- Library.try_unprefix("@{cite", str) |
29432d4a376d
more robust: rely on PIDE markup instead of regex guess;
wenzelm
parents:
76972
diff
changeset
|
753 |
str2 <- Library.try_unsuffix("}", str1) |
29432d4a376d
more robust: rely on PIDE markup instead of regex guess;
wenzelm
parents:
76972
diff
changeset
|
754 |
} yield str2.trim |
29432d4a376d
more robust: rely on PIDE markup instead of regex guess;
wenzelm
parents:
76972
diff
changeset
|
755 |
|
29432d4a376d
more robust: rely on PIDE markup instead of regex guess;
wenzelm
parents:
76972
diff
changeset
|
756 |
opt_body match { |
29432d4a376d
more robust: rely on PIDE markup instead of regex guess;
wenzelm
parents:
76972
diff
changeset
|
757 |
case None => str |
29432d4a376d
more robust: rely on PIDE markup instead of regex guess;
wenzelm
parents:
76972
diff
changeset
|
758 |
case Some(body0) => |
76972 | 759 |
val (name, body1) = |
760 |
Cite_Macro.findFirstMatchIn(body0) match { |
|
76986 | 761 |
case None => (CITE, body0) |
76984
29432d4a376d
more robust: rely on PIDE markup instead of regex guess;
wenzelm
parents:
76972
diff
changeset
|
762 |
case Some(m) => (m.group(1), Cite_Macro.replaceAllIn(body0, "")) |
76972 | 763 |
} |
764 |
val body2 = body1.replace("""\<close>""", """\<close> in""") |
|
76986 | 765 |
if (cite_commands.contains(name)) cite_antiquotation(name, body2) |
766 |
else cite_antiquotation(CITE, body2 + " using " + quote(name)) |
|
76984
29432d4a376d
more robust: rely on PIDE markup instead of regex guess;
wenzelm
parents:
76972
diff
changeset
|
767 |
} |
76972 | 768 |
} |
58523 | 769 |
} |