27 "aug", |
27 "aug", |
28 "sep", |
28 "sep", |
29 "oct", |
29 "oct", |
30 "nov", |
30 "nov", |
31 "dec") |
31 "dec") |
32 |
32 def is_month(s: String): Boolean = months.contains(s.toLowerCase) |
33 val commands = List("preamble", "string") |
33 |
|
34 private val commands = List("preamble", "string") |
|
35 def is_command(s: String): Boolean = commands.contains(s.toLowerCase) |
34 |
36 |
35 sealed case class Entry( |
37 sealed case class Entry( |
36 kind: String, |
38 kind: String, |
37 required: List[String], |
39 required: List[String], |
38 optional_crossref: List[String], |
40 optional_crossref: List[String], |
39 optional: List[String]) |
41 optional_other: List[String]) |
40 { |
42 { |
41 def fields: List[String] = required ::: optional_crossref ::: optional |
43 def is_required(s: String): Boolean = required.contains(s) |
|
44 def is_optional(s: String): Boolean = |
|
45 optional_crossref.contains(s) || optional_other.contains(s) |
|
46 |
|
47 def fields: List[String] = required ::: optional_crossref ::: optional_other |
42 def template: String = |
48 def template: String = |
43 "@" + kind + "{,\n" + fields.map(x => " " + x + " = {},\n").mkString + "}\n" |
49 "@" + kind + "{,\n" + fields.map(x => " " + x + " = {},\n").mkString + "}\n" |
44 } |
50 } |
45 |
51 |
46 val entries: List[Entry] = |
52 val entries: List[Entry] = |
99 Entry("Misc", |
105 Entry("Misc", |
100 List(), |
106 List(), |
101 List(), |
107 List(), |
102 List("author", "title", "howpublished", "month", "year", "note"))) |
108 List("author", "title", "howpublished", "month", "year", "note"))) |
103 |
109 |
|
110 def get_entry(kind: String): Option[Entry] = |
|
111 entries.find(entry => entry.kind.toLowerCase == kind.toLowerCase) |
|
112 |
104 |
113 |
105 |
114 |
106 /** tokens and chunks **/ |
115 /** tokens and chunks **/ |
107 |
116 |
108 object Token |
117 object Token |
109 { |
118 { |
110 object Kind extends Enumeration |
119 object Kind extends Enumeration |
111 { |
120 { |
|
121 val COMMAND = Value("command") |
|
122 val ENTRY = Value("entry") |
112 val KEYWORD = Value("keyword") |
123 val KEYWORD = Value("keyword") |
113 val NAT = Value("natural number") |
124 val NAT = Value("natural number") |
|
125 val STRING = Value("string") |
114 val IDENT = Value("identifier") |
126 val IDENT = Value("identifier") |
115 val STRING = Value("string") |
127 val IGNORED = Value("ignored") |
116 val SPACE = Value("white space") |
|
117 val ERROR = Value("bad input") |
128 val ERROR = Value("bad input") |
118 } |
129 } |
119 } |
130 } |
120 |
131 |
121 sealed case class Token(kind: Token.Kind.Value, val source: String) |
132 sealed case class Token(kind: Token.Kind.Value, val source: String) |
122 { |
133 { |
123 def is_space: Boolean = kind == Token.Kind.SPACE |
134 def is_ignored: Boolean = kind == Token.Kind.IGNORED |
124 def is_error: Boolean = kind == Token.Kind.ERROR |
135 def is_error: Boolean = kind == Token.Kind.ERROR |
125 } |
136 } |
126 |
137 |
127 abstract class Chunk |
138 abstract class Chunk |
128 { |
139 { |
129 def size: Int |
|
130 def kind: String |
140 def kind: String |
|
141 def tokens: List[Token] |
|
142 def source: String |
131 } |
143 } |
132 |
144 |
133 case class Ignored(source: String) extends Chunk |
145 case class Ignored(source: String) extends Chunk |
134 { |
146 { |
135 def size: Int = source.size |
|
136 def kind: String = "" |
147 def kind: String = "" |
|
148 val tokens = List(Token(Token.Kind.IGNORED, source)) |
137 } |
149 } |
138 |
150 |
139 case class Item(kind: String, tokens: List[Token]) extends Chunk |
151 case class Item(kind: String, tokens: List[Token]) extends Chunk |
140 { |
152 { |
141 def size: Int = (0 /: tokens)({ case (n, token) => n + token.source.size }) |
153 val source = tokens.map(_.source).mkString |
142 |
154 |
143 private val wellformed_content: Option[List[Token]] = |
155 private val wellformed_content: Option[List[Token]] = |
144 tokens match { |
156 tokens match { |
145 case Token(Token.Kind.KEYWORD, "@") :: body |
157 case Token(Token.Kind.KEYWORD, "@") :: body |
146 if !body.isEmpty && !body.exists(_.is_error) => |
158 if !body.isEmpty && !body.exists(_.is_error) => |
147 (body.init.filterNot(_.is_space), body.last) match { |
159 (body.init.filterNot(_.is_ignored), body.last) match { |
148 case (Token(Token.Kind.IDENT, _) :: Token(Token.Kind.KEYWORD, "{") :: toks, |
160 case (Token(Token.Kind.IDENT, _) :: Token(Token.Kind.KEYWORD, "{") :: toks, |
149 Token(Token.Kind.KEYWORD, "}")) => Some(toks) |
161 Token(Token.Kind.KEYWORD, "}")) => Some(toks) |
150 case (Token(Token.Kind.IDENT, _) :: Token(Token.Kind.KEYWORD, "(") :: toks, |
162 case (Token(Token.Kind.IDENT, _) :: Token(Token.Kind.KEYWORD, "(") :: toks, |
151 Token(Token.Kind.KEYWORD, ")")) => Some(toks) |
163 Token(Token.Kind.KEYWORD, ")")) => Some(toks) |
152 case _ => None |
164 case _ => None |
262 private val left_paren = "(" ^^ keyword |
274 private val left_paren = "(" ^^ keyword |
263 private val right_paren = ")" ^^ keyword |
275 private val right_paren = ")" ^^ keyword |
264 |
276 |
265 private val nat = "[0-9]+".r ^^ token(Token.Kind.NAT) |
277 private val nat = "[0-9]+".r ^^ token(Token.Kind.NAT) |
266 |
278 |
267 private val ident = |
279 private val identifier = |
268 """[\x21-\x7f&&[^"#%'(),={}0-9]][\x21-\x7f&&[^"#%'(),={}]]*""".r ^^ token(Token.Kind.IDENT) |
280 """[\x21-\x7f&&[^"#%'(),={}0-9]][\x21-\x7f&&[^"#%'(),={}]]*""".r |
|
281 |
|
282 private val ident = identifier ^^ token(Token.Kind.IDENT) |
269 |
283 |
270 val other_token = "[=#,]".r ^^ keyword | (nat | (ident | space)) |
284 val other_token = "[=#,]".r ^^ keyword | (nat | (ident | space)) |
271 |
285 |
272 |
286 |
273 /* items */ |
287 /* items */ |
274 |
288 |
|
289 private val special_ident = |
|
290 identifier ^^ { case a => |
|
291 val kind = |
|
292 if (is_command(a)) Token.Kind.COMMAND |
|
293 else if (get_entry(a).isDefined) Token.Kind.ENTRY |
|
294 else Token.Kind.IDENT |
|
295 Token(kind, a) |
|
296 } |
|
297 |
275 private val item_start: Parser[(String, List[Token])] = |
298 private val item_start: Parser[(String, List[Token])] = |
276 at ~ rep(strict_space) ~ ident ~ rep(strict_space) ^^ |
299 at ~ rep(strict_space) ~ special_ident ~ rep(strict_space) ^^ |
277 { case a ~ b ~ c ~ d => (c.source, List(a) ::: b ::: List(c) ::: d) } |
300 { case a ~ b ~ c ~ d => (c.source, List(a) ::: b ::: List(c) ::: d) } |
278 |
301 |
279 private val item_body = |
302 private val item_body = |
280 delimited | (recover_delimited | other_token) |
303 delimited | (recover_delimited | other_token) |
281 |
304 |
283 (item_start ~ left_brace ~ rep(item_body) ~ opt(right_brace) | |
306 (item_start ~ left_brace ~ rep(item_body) ~ opt(right_brace) | |
284 item_start ~ left_paren ~ rep(item_body) ~ opt(right_paren)) ^^ |
307 item_start ~ left_paren ~ rep(item_body) ~ opt(right_paren)) ^^ |
285 { case (kind, a) ~ b ~ c ~ d => Item(kind, a ::: List(b) ::: c ::: d.toList) } |
308 { case (kind, a) ~ b ~ c ~ d => Item(kind, a ::: List(b) ::: c ::: d.toList) } |
286 |
309 |
287 private val recover_item: Parser[Item] = |
310 private val recover_item: Parser[Item] = |
288 at ~ "(?m)[^@]+".r ^^ { case a ~ b => Item("", List(a, Token(Token.Kind.ERROR, b))) } |
311 at ~ "(?m)[^@]*".r ^^ { case a ~ b => Item("", List(a, Token(Token.Kind.ERROR, b))) } |
289 |
312 |
290 def item_line(ctxt: Line_Context): Parser[(Item, Line_Context)] = |
313 def item_line(ctxt: Line_Context): Parser[(Item, Line_Context)] = |
291 { |
314 { |
292 ctxt match { |
315 ctxt match { |
293 case Ignored_Context => |
316 case Ignored_Context => |
294 item_start ~ (left_brace | left_paren) ^^ |
317 item_start ~ (left_brace | left_paren) ^^ |
295 { case (kind, a) ~ b => |
318 { case (kind, a) ~ b => |
296 val right = if (b.source == "{") "}" else ")" |
319 val right = if (b.source == "{") "}" else ")" |
297 (Item(kind, a ::: List(b)), Item_Context(kind, Closed, right)) } |
320 (Item(kind, a ::: List(b)), Item_Context(kind, Closed, right)) } | |
|
321 recover_item ^^ { case a => (a, Ignored_Context) } |
298 case Item_Context(kind, delim, right) => |
322 case Item_Context(kind, delim, right) => |
299 if (delim.depth > 0) |
323 if (delim.depth > 0) |
300 delimited_line(ctxt) |
324 delimited_line(ctxt) |
301 else { |
325 else { |
302 delimited_line(ctxt) | |
326 delimited_line(ctxt) | |