author | wenzelm |
Mon, 06 Oct 2014 11:44:16 +0200 | |
changeset 58590 | 472b9fbcc7f0 |
parent 58589 | d9350ec0937e |
child 58591 | 3c1a8c1c6b3b |
permissions | -rw-r--r-- |
58523 | 1 |
/* Title: Pure/Tools/bibtex.scala |
2 |
Author: Makarius |
|
3 |
||
58544
340f130b3d38
bibtex support in ML: document antiquotation @{cite} with markup;
wenzelm
parents:
58543
diff
changeset
|
4 |
BibTeX support. |
58523 | 5 |
*/ |
6 |
||
7 |
package isabelle |
|
8 |
||
9 |
||
58528 | 10 |
import scala.collection.mutable |
58523 | 11 |
import scala.util.parsing.input.{Reader, CharSequenceReader} |
12 |
import scala.util.parsing.combinator.RegexParsers |
|
13 |
||
14 |
||
15 |
object Bibtex |
|
16 |
{ |
|
17 |
/** content **/ |
|
18 |
||
58529 | 19 |
private val months = List( |
58523 | 20 |
"jan", |
21 |
"feb", |
|
22 |
"mar", |
|
23 |
"apr", |
|
24 |
"may", |
|
25 |
"jun", |
|
26 |
"jul", |
|
27 |
"aug", |
|
28 |
"sep", |
|
29 |
"oct", |
|
30 |
"nov", |
|
31 |
"dec") |
|
58529 | 32 |
def is_month(s: String): Boolean = months.contains(s.toLowerCase) |
58523 | 33 |
|
58529 | 34 |
private val commands = List("preamble", "string") |
35 |
def is_command(s: String): Boolean = commands.contains(s.toLowerCase) |
|
58523 | 36 |
|
58524 | 37 |
sealed case class Entry( |
58526 | 38 |
kind: String, |
58523 | 39 |
required: List[String], |
40 |
optional_crossref: List[String], |
|
58529 | 41 |
optional_other: List[String]) |
58524 | 42 |
{ |
58533 | 43 |
def is_required(s: String): Boolean = required.contains(s.toLowerCase) |
58529 | 44 |
def is_optional(s: String): Boolean = |
58533 | 45 |
optional_crossref.contains(s.toLowerCase) || optional_other.contains(s.toLowerCase) |
58529 | 46 |
|
47 |
def fields: List[String] = required ::: optional_crossref ::: optional_other |
|
58524 | 48 |
def template: String = |
58526 | 49 |
"@" + kind + "{,\n" + fields.map(x => " " + x + " = {},\n").mkString + "}\n" |
58524 | 50 |
} |
58523 | 51 |
|
58524 | 52 |
val entries: List[Entry] = |
53 |
List( |
|
54 |
Entry("Article", |
|
55 |
List("author", "title"), |
|
56 |
List("journal", "year"), |
|
57 |
List("volume", "number", "pages", "month", "note")), |
|
58 |
Entry("InProceedings", |
|
59 |
List("author", "title"), |
|
60 |
List("booktitle", "year"), |
|
61 |
List("editor", "volume", "number", "series", "pages", "month", "address", |
|
62 |
"organization", "publisher", "note")), |
|
63 |
Entry("InCollection", |
|
64 |
List("author", "title", "booktitle"), |
|
65 |
List("publisher", "year"), |
|
66 |
List("editor", "volume", "number", "series", "type", "chapter", "pages", |
|
67 |
"edition", "month", "address", "note")), |
|
68 |
Entry("InBook", |
|
69 |
List("author", "editor", "title", "chapter"), |
|
70 |
List("publisher", "year"), |
|
71 |
List("volume", "number", "series", "type", "address", "edition", "month", "pages", "note")), |
|
72 |
Entry("Proceedings", |
|
73 |
List("title", "year"), |
|
74 |
List(), |
|
75 |
List("booktitle", "editor", "volume", "number", "series", "address", "month", |
|
76 |
"organization", "publisher", "note")), |
|
77 |
Entry("Book", |
|
78 |
List("author", "editor", "title"), |
|
79 |
List("publisher", "year"), |
|
80 |
List("volume", "number", "series", "address", "edition", "month", "note")), |
|
81 |
Entry("Booklet", |
|
82 |
List("title"), |
|
83 |
List(), |
|
84 |
List("author", "howpublished", "address", "month", "year", "note")), |
|
85 |
Entry("PhdThesis", |
|
86 |
List("author", "title", "school", "year"), |
|
87 |
List(), |
|
88 |
List("type", "address", "month", "note")), |
|
89 |
Entry("MastersThesis", |
|
90 |
List("author", "title", "school", "year"), |
|
91 |
List(), |
|
92 |
List("type", "address", "month", "note")), |
|
93 |
Entry("TechReport", |
|
94 |
List("author", "title", "institution", "year"), |
|
95 |
List(), |
|
96 |
List("type", "number", "address", "month", "note")), |
|
97 |
Entry("Manual", |
|
98 |
List("title"), |
|
99 |
List(), |
|
100 |
List("author", "organization", "address", "edition", "month", "year", "note")), |
|
101 |
Entry("Unpublished", |
|
102 |
List("author", "title", "note"), |
|
103 |
List(), |
|
104 |
List("month", "year")), |
|
105 |
Entry("Misc", |
|
106 |
List(), |
|
107 |
List(), |
|
108 |
List("author", "title", "howpublished", "month", "year", "note"))) |
|
58523 | 109 |
|
58529 | 110 |
def get_entry(kind: String): Option[Entry] = |
111 |
entries.find(entry => entry.kind.toLowerCase == kind.toLowerCase) |
|
112 |
||
58530 | 113 |
def is_entry(kind: String): Boolean = get_entry(kind).isDefined |
114 |
||
58523 | 115 |
|
116 |
||
117 |
/** tokens and chunks **/ |
|
118 |
||
119 |
object Token |
|
120 |
{ |
|
121 |
object Kind extends Enumeration |
|
122 |
{ |
|
58529 | 123 |
val COMMAND = Value("command") |
124 |
val ENTRY = Value("entry") |
|
58523 | 125 |
val KEYWORD = Value("keyword") |
126 |
val NAT = Value("natural number") |
|
58529 | 127 |
val STRING = Value("string") |
58531 | 128 |
val NAME = Value("name") |
58523 | 129 |
val IDENT = Value("identifier") |
58535 | 130 |
val SPACE = Value("white space") |
131 |
val COMMENT = Value("ignored text") |
|
58523 | 132 |
val ERROR = Value("bad input") |
133 |
} |
|
134 |
} |
|
135 |
||
136 |
sealed case class Token(kind: Token.Kind.Value, val source: String) |
|
137 |
{ |
|
58530 | 138 |
def is_kind: Boolean = |
139 |
kind == Token.Kind.COMMAND || |
|
140 |
kind == Token.Kind.ENTRY || |
|
141 |
kind == Token.Kind.IDENT |
|
58531 | 142 |
def is_name: Boolean = |
143 |
kind == Token.Kind.NAME || |
|
144 |
kind == Token.Kind.IDENT |
|
58535 | 145 |
def is_ignored: Boolean = |
146 |
kind == Token.Kind.SPACE || |
|
147 |
kind == Token.Kind.COMMENT |
|
148 |
def is_malformed: Boolean = kind == |
|
149 |
Token.Kind.ERROR |
|
58523 | 150 |
} |
151 |
||
58530 | 152 |
case class Chunk(kind: String, tokens: List[Token]) |
58523 | 153 |
{ |
58529 | 154 |
val source = tokens.map(_.source).mkString |
58526 | 155 |
|
58530 | 156 |
private val content: Option[List[Token]] = |
58523 | 157 |
tokens match { |
58530 | 158 |
case Token(Token.Kind.KEYWORD, "@") :: body if !body.isEmpty => |
58529 | 159 |
(body.init.filterNot(_.is_ignored), body.last) match { |
58530 | 160 |
case (tok :: Token(Token.Kind.KEYWORD, "{") :: toks, Token(Token.Kind.KEYWORD, "}")) |
161 |
if tok.is_kind => Some(toks) |
|
162 |
||
163 |
case (tok :: Token(Token.Kind.KEYWORD, "(") :: toks, Token(Token.Kind.KEYWORD, ")")) |
|
164 |
if tok.is_kind => Some(toks) |
|
165 |
||
58528 | 166 |
case _ => None |
58523 | 167 |
} |
58528 | 168 |
case _ => None |
58526 | 169 |
} |
170 |
||
171 |
def name: String = |
|
58530 | 172 |
content match { |
58531 | 173 |
case Some(tok :: _) if tok.is_name => tok.source |
58523 | 174 |
case _ => "" |
175 |
} |
|
58530 | 176 |
|
177 |
def is_ignored: Boolean = kind == "" && tokens.forall(_.is_ignored) |
|
178 |
def is_malformed: Boolean = kind == "" || tokens.exists(_.is_malformed) |
|
58543 | 179 |
def is_command: Boolean = Bibtex.is_command(kind) && name != "" && content.isDefined |
180 |
def is_entry: Boolean = Bibtex.is_entry(kind) && name != "" && content.isDefined |
|
58523 | 181 |
} |
182 |
||
183 |
||
184 |
||
185 |
/** parsing **/ |
|
186 |
||
187 |
// context of partial line-oriented scans |
|
188 |
abstract class Line_Context |
|
58589 | 189 |
case object Ignored extends Line_Context |
58590
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
190 |
case object At extends Line_Context |
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
191 |
case class Item_Start(kind: String) extends Line_Context |
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
192 |
case class Item_Open(kind: String, right: String) extends Line_Context |
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
193 |
case class Item(kind: String, right: String, delim: Delimited) extends Line_Context |
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
194 |
|
58528 | 195 |
case class Delimited(quoted: Boolean, depth: Int) |
196 |
val Closed = Delimited(false, 0) |
|
58523 | 197 |
|
198 |
private def token(kind: Token.Kind.Value)(source: String): Token = Token(kind, source) |
|
199 |
private def keyword(source: String): Token = Token(Token.Kind.KEYWORD, source) |
|
200 |
||
201 |
||
202 |
// See also http://ctan.org/tex-archive/biblio/bibtex/base/bibtex.web |
|
203 |
// module @<Scan for and process a \.{.bib} command or database entry@>. |
|
204 |
||
205 |
object Parsers extends RegexParsers |
|
206 |
{ |
|
207 |
/* white space and comments */ |
|
208 |
||
209 |
override val whiteSpace = "".r |
|
210 |
||
58535 | 211 |
private val space = """[ \t\n\r]+""".r ^^ token(Token.Kind.SPACE) |
58590
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
212 |
private val spaces = rep(space) |
58523 | 213 |
|
58528 | 214 |
|
58535 | 215 |
/* ignored text */ |
58528 | 216 |
|
217 |
private val ignored: Parser[Chunk] = |
|
58530 | 218 |
rep1("""(?mi)([^@]+|@[ \t]*comment)""".r) ^^ { |
58535 | 219 |
case ss => Chunk("", List(Token(Token.Kind.COMMENT, ss.mkString))) } |
58523 | 220 |
|
58536
402a8e8107a7
more total chunk_line: recovery via ignored_line;
wenzelm
parents:
58535
diff
changeset
|
221 |
private def ignored_line: Parser[(Chunk, Line_Context)] = |
58589 | 222 |
ignored ^^ { case a => (a, Ignored) } |
58536
402a8e8107a7
more total chunk_line: recovery via ignored_line;
wenzelm
parents:
58535
diff
changeset
|
223 |
|
58523 | 224 |
|
225 |
/* delimited string: outermost "..." or {...} and body with balanced {...} */ |
|
226 |
||
58534 | 227 |
// see also bibtex.web: scan_a_field_token_and_eat_white, scan_balanced_braces |
58523 | 228 |
private def delimited_depth(delim: Delimited): Parser[(String, Delimited)] = |
229 |
new Parser[(String, Delimited)] |
|
230 |
{ |
|
231 |
require(if (delim.quoted) delim.depth > 0 else delim.depth >= 0) |
|
232 |
||
233 |
def apply(in: Input) = |
|
234 |
{ |
|
235 |
val start = in.offset |
|
236 |
val end = in.source.length |
|
237 |
||
238 |
var i = start |
|
239 |
var q = delim.quoted |
|
240 |
var d = delim.depth |
|
241 |
var finished = false |
|
242 |
while (!finished && i < end) { |
|
243 |
val c = in.source.charAt(i) |
|
58534 | 244 |
|
58523 | 245 |
if (c == '"' && d == 0) { i += 1; d = 1; q = true } |
58532 | 246 |
else if (c == '"' && d == 1 && q) { |
247 |
i += 1; d = 0; q = false; finished = true |
|
248 |
} |
|
58523 | 249 |
else if (c == '{') { i += 1; d += 1 } |
58534 | 250 |
else if (c == '}') { |
251 |
if (d == 1 && !q || d > 1) { i += 1; d -= 1; if (d == 0) finished = true } |
|
252 |
else {i = start; finished = true } |
|
58532 | 253 |
} |
58523 | 254 |
else if (d > 0) i += 1 |
255 |
else finished = true |
|
256 |
} |
|
257 |
if (i == start) Failure("bad input", in) |
|
58528 | 258 |
else { |
259 |
val s = in.source.subSequence(start, i).toString |
|
260 |
Success((s, Delimited(q, d)), in.drop(i - start)) |
|
261 |
} |
|
58523 | 262 |
} |
263 |
}.named("delimited_depth") |
|
264 |
||
58528 | 265 |
private def delimited: Parser[Token] = |
266 |
delimited_depth(Closed) ^? |
|
267 |
{ case (s, delim) if delim == Closed => Token(Token.Kind.STRING, s) } |
|
58523 | 268 |
|
58534 | 269 |
private def recover_delimited: Parser[Token] = |
270 |
"""(?m)["{][^@]*""".r ^^ token(Token.Kind.ERROR) |
|
271 |
||
58590
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
272 |
def delimited_line(ctxt: Item): Parser[(Chunk, Line_Context)] = |
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
273 |
delimited_depth(ctxt.delim) ^^ { case (s, delim1) => |
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
274 |
(Chunk(ctxt.kind, List(Token(Token.Kind.STRING, s))), ctxt.copy(delim = delim1)) } | |
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
275 |
recover_delimited ^^ { case a => (Chunk(ctxt.kind, List(a)), Ignored) } |
58523 | 276 |
|
277 |
||
278 |
/* other tokens */ |
|
279 |
||
280 |
private val at = "@" ^^ keyword |
|
281 |
private val left_brace = "{" ^^ keyword |
|
282 |
private val right_brace = "}" ^^ keyword |
|
283 |
private val left_paren = "(" ^^ keyword |
|
284 |
private val right_paren = ")" ^^ keyword |
|
285 |
||
286 |
private val nat = "[0-9]+".r ^^ token(Token.Kind.NAT) |
|
287 |
||
58529 | 288 |
private val identifier = |
289 |
"""[\x21-\x7f&&[^"#%'(),={}0-9]][\x21-\x7f&&[^"#%'(),={}]]*""".r |
|
290 |
||
291 |
private val ident = identifier ^^ token(Token.Kind.IDENT) |
|
58523 | 292 |
|
58528 | 293 |
val other_token = "[=#,]".r ^^ keyword | (nat | (ident | space)) |
294 |
||
295 |
||
58530 | 296 |
/* items: command or entry */ |
58528 | 297 |
|
58530 | 298 |
private val item_kind = |
58529 | 299 |
identifier ^^ { case a => |
300 |
val kind = |
|
301 |
if (is_command(a)) Token.Kind.COMMAND |
|
58530 | 302 |
else if (is_entry(a)) Token.Kind.ENTRY |
58529 | 303 |
else Token.Kind.IDENT |
304 |
Token(kind, a) |
|
305 |
} |
|
306 |
||
58531 | 307 |
private val item_start = |
58590
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
308 |
at ~ spaces ~ item_kind ~ spaces ^^ |
58528 | 309 |
{ case a ~ b ~ c ~ d => (c.source, List(a) ::: b ::: List(c) ::: d) } |
310 |
||
58590
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
311 |
private val item_name = identifier ^^ token(Token.Kind.NAME) |
58531 | 312 |
|
58528 | 313 |
private val item_body = |
314 |
delimited | (recover_delimited | other_token) |
|
315 |
||
58530 | 316 |
private val item: Parser[Chunk] = |
58590
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
317 |
(item_start ~ left_brace ~ spaces ~ item_name ~ rep(item_body) ~ opt(right_brace) | |
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
318 |
item_start ~ left_paren ~ spaces ~ item_name ~ rep(item_body) ~ opt(right_paren)) ^^ |
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
319 |
{ case (kind, a) ~ b ~ c ~ d ~ e ~ f => |
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
320 |
Chunk(kind, a ::: List(b) ::: c ::: List(d) ::: e ::: f.toList) } |
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
321 |
|
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
322 |
|
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
323 |
/* chunks */ |
58528 | 324 |
|
58530 | 325 |
private val recover_item: Parser[Chunk] = |
326 |
at ~ "(?m)[^@]*".r ^^ { case a ~ b => Chunk("", List(a, Token(Token.Kind.ERROR, b))) } |
|
58528 | 327 |
|
58590
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
328 |
private def item_body_line(ctxt: Item) = |
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
329 |
if (ctxt.delim.depth > 0) |
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
330 |
delimited_line(ctxt) |
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
331 |
else |
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
332 |
delimited_line(ctxt) | |
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
333 |
other_token ^^ { case a => (Chunk(ctxt.kind, List(a)), ctxt) } | |
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
334 |
ctxt.right ^^ { case a => (Chunk(ctxt.kind, List(keyword(a))), Ignored) } |
58523 | 335 |
|
58528 | 336 |
val chunk: Parser[Chunk] = ignored | (item | recover_item) |
58523 | 337 |
|
58528 | 338 |
def chunk_line(ctxt: Line_Context): Parser[(Chunk, Line_Context)] = |
58530 | 339 |
{ |
340 |
ctxt match { |
|
58589 | 341 |
case Ignored => |
58538
299b82d12d53
proper treatment of @comment (amending 402a8e8107a7);
wenzelm
parents:
58536
diff
changeset
|
342 |
ignored_line | |
58590
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
343 |
at ^^ { case a => (Chunk("", List(a)), At) } |
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
344 |
|
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
345 |
case At => |
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
346 |
space ^^ { case a => (Chunk("", List(a)), ctxt) } | |
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
347 |
item_kind ^^ { case a => (Chunk(a.source, List(a)), Item_Start(a.source)) } | |
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
348 |
recover_item ^^ { case a => (a, Ignored) } | |
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
349 |
ignored_line |
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
350 |
|
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
351 |
case Item_Start(kind) => |
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
352 |
space ^^ { case a => (Chunk(kind, List(a)), ctxt) } | |
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
353 |
left_brace ^^ { case a => (Chunk(kind, List(a)), Item_Open(kind, "}")) } | |
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
354 |
left_paren ^^ { case a => (Chunk(kind, List(a)), Item_Open(kind, ")")) } | |
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
355 |
ignored_line |
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
356 |
|
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
357 |
case Item_Open(kind, right) => |
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
358 |
space ^^ { case a => (Chunk(kind, List(a)), ctxt) } | |
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
359 |
item_name ^^ { case a => (Chunk(kind, List(a)), Item(kind, right, Closed)) } | |
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
360 |
item_body_line(Item(kind, right, Closed)) | |
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
361 |
ignored_line |
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
362 |
|
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
363 |
case item_ctxt: Item => |
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
364 |
item_body_line(item_ctxt) | |
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
365 |
ignored_line |
472b9fbcc7f0
back to liberal spaces of official syntax (in contrast to 4b190c763097), e.g. relevant for easychair.bib;
wenzelm
parents:
58589
diff
changeset
|
366 |
|
58530 | 367 |
case _ => failure("") |
368 |
} |
|
369 |
} |
|
58528 | 370 |
} |
58523 | 371 |
|
372 |
||
58528 | 373 |
/* parse */ |
58523 | 374 |
|
375 |
def parse(input: CharSequence): List[Chunk] = |
|
376 |
{ |
|
377 |
val in: Reader[Char] = new CharSequenceReader(input) |
|
58528 | 378 |
Parsers.parseAll(Parsers.rep(Parsers.chunk), in) match { |
58523 | 379 |
case Parsers.Success(result, _) => result |
58526 | 380 |
case _ => error("Unexpected failure to parse input:\n" + input.toString) |
58523 | 381 |
} |
382 |
} |
|
58528 | 383 |
|
384 |
def parse_line(input: CharSequence, context: Line_Context): (List[Chunk], Line_Context) = |
|
385 |
{ |
|
386 |
var in: Reader[Char] = new CharSequenceReader(input) |
|
387 |
val chunks = new mutable.ListBuffer[Chunk] |
|
388 |
var ctxt = context |
|
389 |
while (!in.atEnd) { |
|
390 |
Parsers.parse(Parsers.chunk_line(ctxt), in) match { |
|
391 |
case Parsers.Success((x, c), rest) => { chunks += x; ctxt = c; in = rest } |
|
392 |
case Parsers.NoSuccess(_, rest) => |
|
393 |
error("Unepected failure to parse input:\n" + rest.source.toString) |
|
394 |
} |
|
395 |
} |
|
396 |
(chunks.toList, ctxt) |
|
397 |
} |
|
58523 | 398 |
} |
399 |