src/Pure/Tools/bibtex.scala
changeset 58529 cd4439d8799c
parent 58528 7d6b8f8893e8
child 58530 7ee248f19ca9
equal deleted inserted replaced
58528:7d6b8f8893e8 58529:cd4439d8799c
    14 
    14 
    15 object Bibtex
    15 object Bibtex
    16 {
    16 {
    17   /** content **/
    17   /** content **/
    18 
    18 
    19   val months = List(
    19   private val months = List(
    20     "jan",
    20     "jan",
    21     "feb",
    21     "feb",
    22     "mar",
    22     "mar",
    23     "apr",
    23     "apr",
    24     "may",
    24     "may",
    27     "aug",
    27     "aug",
    28     "sep",
    28     "sep",
    29     "oct",
    29     "oct",
    30     "nov",
    30     "nov",
    31     "dec")
    31     "dec")
    32 
    32   def is_month(s: String): Boolean = months.contains(s.toLowerCase)
    33   val commands = List("preamble", "string")
    33 
       
    34   private val commands = List("preamble", "string")
       
    35   def is_command(s: String): Boolean = commands.contains(s.toLowerCase)
    34 
    36 
    35   sealed case class Entry(
    37   sealed case class Entry(
    36     kind: String,
    38     kind: String,
    37     required: List[String],
    39     required: List[String],
    38     optional_crossref: List[String],
    40     optional_crossref: List[String],
    39     optional: List[String])
    41     optional_other: List[String])
    40   {
    42   {
    41     def fields: List[String] = required ::: optional_crossref ::: optional
    43     def is_required(s: String): Boolean = required.contains(s)
       
    44     def is_optional(s: String): Boolean =
       
    45       optional_crossref.contains(s) || optional_other.contains(s)
       
    46 
       
    47     def fields: List[String] = required ::: optional_crossref ::: optional_other
    42     def template: String =
    48     def template: String =
    43       "@" + kind + "{,\n" + fields.map(x => "  " + x + " = {},\n").mkString + "}\n"
    49       "@" + kind + "{,\n" + fields.map(x => "  " + x + " = {},\n").mkString + "}\n"
    44   }
    50   }
    45 
    51 
    46   val entries: List[Entry] =
    52   val entries: List[Entry] =
    99       Entry("Misc",
   105       Entry("Misc",
   100         List(),
   106         List(),
   101         List(),
   107         List(),
   102         List("author", "title", "howpublished", "month", "year", "note")))
   108         List("author", "title", "howpublished", "month", "year", "note")))
   103 
   109 
       
   110   def get_entry(kind: String): Option[Entry] =
       
   111     entries.find(entry => entry.kind.toLowerCase == kind.toLowerCase)
       
   112 
   104 
   113 
   105 
   114 
   106   /** tokens and chunks **/
   115   /** tokens and chunks **/
   107 
   116 
   108   object Token
   117   object Token
   109   {
   118   {
   110     object Kind extends Enumeration
   119     object Kind extends Enumeration
   111     {
   120     {
       
   121       val COMMAND = Value("command")
       
   122       val ENTRY = Value("entry")
   112       val KEYWORD = Value("keyword")
   123       val KEYWORD = Value("keyword")
   113       val NAT = Value("natural number")
   124       val NAT = Value("natural number")
       
   125       val STRING = Value("string")
   114       val IDENT = Value("identifier")
   126       val IDENT = Value("identifier")
   115       val STRING = Value("string")
   127       val IGNORED = Value("ignored")
   116       val SPACE = Value("white space")
       
   117       val ERROR = Value("bad input")
   128       val ERROR = Value("bad input")
   118     }
   129     }
   119   }
   130   }
   120 
   131 
   121   sealed case class Token(kind: Token.Kind.Value, val source: String)
   132   sealed case class Token(kind: Token.Kind.Value, val source: String)
   122   {
   133   {
   123     def is_space: Boolean = kind == Token.Kind.SPACE
   134     def is_ignored: Boolean = kind == Token.Kind.IGNORED
   124     def is_error: Boolean = kind == Token.Kind.ERROR
   135     def is_error: Boolean = kind == Token.Kind.ERROR
   125   }
   136   }
   126 
   137 
   127   abstract class Chunk
   138   abstract class Chunk
   128   {
   139   {
   129     def size: Int
       
   130     def kind: String
   140     def kind: String
       
   141     def tokens: List[Token]
       
   142     def source: String
   131   }
   143   }
   132 
   144 
   133   case class Ignored(source: String) extends Chunk
   145   case class Ignored(source: String) extends Chunk
   134   {
   146   {
   135     def size: Int = source.size
       
   136     def kind: String = ""
   147     def kind: String = ""
       
   148     val tokens = List(Token(Token.Kind.IGNORED, source))
   137   }
   149   }
   138 
   150 
   139   case class Item(kind: String, tokens: List[Token]) extends Chunk
   151   case class Item(kind: String, tokens: List[Token]) extends Chunk
   140   {
   152   {
   141     def size: Int = (0 /: tokens)({ case (n, token) => n + token.source.size })
   153     val source = tokens.map(_.source).mkString
   142 
   154 
   143     private val wellformed_content: Option[List[Token]] =
   155     private val wellformed_content: Option[List[Token]] =
   144       tokens match {
   156       tokens match {
   145         case Token(Token.Kind.KEYWORD, "@") :: body
   157         case Token(Token.Kind.KEYWORD, "@") :: body
   146         if !body.isEmpty && !body.exists(_.is_error) =>
   158         if !body.isEmpty && !body.exists(_.is_error) =>
   147           (body.init.filterNot(_.is_space), body.last) match {
   159           (body.init.filterNot(_.is_ignored), body.last) match {
   148             case (Token(Token.Kind.IDENT, _) :: Token(Token.Kind.KEYWORD, "{") :: toks,
   160             case (Token(Token.Kind.IDENT, _) :: Token(Token.Kind.KEYWORD, "{") :: toks,
   149                   Token(Token.Kind.KEYWORD, "}")) => Some(toks)
   161                   Token(Token.Kind.KEYWORD, "}")) => Some(toks)
   150             case (Token(Token.Kind.IDENT, _) :: Token(Token.Kind.KEYWORD, "(") :: toks,
   162             case (Token(Token.Kind.IDENT, _) :: Token(Token.Kind.KEYWORD, "(") :: toks,
   151                   Token(Token.Kind.KEYWORD, ")")) => Some(toks)
   163                   Token(Token.Kind.KEYWORD, ")")) => Some(toks)
   152             case _ => None
   164             case _ => None
   185   {
   197   {
   186     /* white space and comments */
   198     /* white space and comments */
   187 
   199 
   188     override val whiteSpace = "".r
   200     override val whiteSpace = "".r
   189 
   201 
   190     private val space = """[ \t\n\r]+""".r ^^ token(Token.Kind.SPACE)
   202     private val space = """[ \t\n\r]+""".r ^^ token(Token.Kind.IGNORED)
   191     private val strict_space = """[ \t]+""".r ^^ token(Token.Kind.SPACE)
   203     private val strict_space = """[ \t]+""".r ^^ token(Token.Kind.IGNORED)
   192 
   204 
   193 
   205 
   194     /* ignored material outside items */
   206     /* ignored material outside items */
   195 
   207 
   196     private val ignored: Parser[Chunk] =
   208     private val ignored: Parser[Chunk] =
   262     private val left_paren = "(" ^^ keyword
   274     private val left_paren = "(" ^^ keyword
   263     private val right_paren = ")" ^^ keyword
   275     private val right_paren = ")" ^^ keyword
   264 
   276 
   265     private val nat = "[0-9]+".r ^^ token(Token.Kind.NAT)
   277     private val nat = "[0-9]+".r ^^ token(Token.Kind.NAT)
   266 
   278 
   267     private val ident =
   279     private val identifier =
   268       """[\x21-\x7f&&[^"#%'(),={}0-9]][\x21-\x7f&&[^"#%'(),={}]]*""".r ^^ token(Token.Kind.IDENT)
   280       """[\x21-\x7f&&[^"#%'(),={}0-9]][\x21-\x7f&&[^"#%'(),={}]]*""".r
       
   281 
       
   282     private val ident = identifier ^^ token(Token.Kind.IDENT)
   269 
   283 
   270     val other_token = "[=#,]".r ^^ keyword | (nat | (ident | space))
   284     val other_token = "[=#,]".r ^^ keyword | (nat | (ident | space))
   271 
   285 
   272 
   286 
   273     /* items */
   287     /* items */
   274 
   288 
       
   289     private val special_ident =
       
   290       identifier ^^ { case a =>
       
   291         val kind =
       
   292           if (is_command(a)) Token.Kind.COMMAND
       
   293           else if (get_entry(a).isDefined) Token.Kind.ENTRY
       
   294           else Token.Kind.IDENT
       
   295         Token(kind, a)
       
   296       }
       
   297 
   275     private val item_start: Parser[(String, List[Token])] =
   298     private val item_start: Parser[(String, List[Token])] =
   276       at ~ rep(strict_space) ~ ident ~ rep(strict_space) ^^
   299       at ~ rep(strict_space) ~ special_ident ~ rep(strict_space) ^^
   277         { case a ~ b ~ c ~ d => (c.source, List(a) ::: b ::: List(c) ::: d) }
   300         { case a ~ b ~ c ~ d => (c.source, List(a) ::: b ::: List(c) ::: d) }
   278 
   301 
   279     private val item_body =
   302     private val item_body =
   280       delimited | (recover_delimited | other_token)
   303       delimited | (recover_delimited | other_token)
   281 
   304 
   283       (item_start ~ left_brace ~ rep(item_body) ~ opt(right_brace) |
   306       (item_start ~ left_brace ~ rep(item_body) ~ opt(right_brace) |
   284        item_start ~ left_paren ~ rep(item_body) ~ opt(right_paren)) ^^
   307        item_start ~ left_paren ~ rep(item_body) ~ opt(right_paren)) ^^
   285         { case (kind, a) ~ b ~ c ~ d => Item(kind, a ::: List(b) ::: c ::: d.toList) }
   308         { case (kind, a) ~ b ~ c ~ d => Item(kind, a ::: List(b) ::: c ::: d.toList) }
   286 
   309 
   287     private val recover_item: Parser[Item] =
   310     private val recover_item: Parser[Item] =
   288       at ~ "(?m)[^@]+".r ^^ { case a ~ b => Item("", List(a, Token(Token.Kind.ERROR, b))) }
   311       at ~ "(?m)[^@]*".r ^^ { case a ~ b => Item("", List(a, Token(Token.Kind.ERROR, b))) }
   289 
   312 
   290     def item_line(ctxt: Line_Context): Parser[(Item, Line_Context)] =
   313     def item_line(ctxt: Line_Context): Parser[(Item, Line_Context)] =
   291     {
   314     {
   292       ctxt match {
   315       ctxt match {
   293         case Ignored_Context =>
   316         case Ignored_Context =>
   294           item_start ~ (left_brace | left_paren) ^^
   317           item_start ~ (left_brace | left_paren) ^^
   295             { case (kind, a) ~ b =>
   318             { case (kind, a) ~ b =>
   296                 val right = if (b.source == "{") "}" else ")"
   319                 val right = if (b.source == "{") "}" else ")"
   297                 (Item(kind, a ::: List(b)), Item_Context(kind, Closed, right)) }
   320                 (Item(kind, a ::: List(b)), Item_Context(kind, Closed, right)) } |
       
   321           recover_item ^^ { case a => (a, Ignored_Context) }
   298         case Item_Context(kind, delim, right) =>
   322         case Item_Context(kind, delim, right) =>
   299           if (delim.depth > 0)
   323           if (delim.depth > 0)
   300             delimited_line(ctxt)
   324             delimited_line(ctxt)
   301           else {
   325           else {
   302             delimited_line(ctxt) |
   326             delimited_line(ctxt) |