src/Pure/Tools/bibtex.scala
changeset 58591 3c1a8c1c6b3b
parent 58590 472b9fbcc7f0
child 58596 877c5ecee253
equal deleted inserted replaced
58590:472b9fbcc7f0 58591:3c1a8c1c6b3b
   187   // context of partial line-oriented scans
   187   // context of partial line-oriented scans
   188   abstract class Line_Context
   188   abstract class Line_Context
   189   case object Ignored extends Line_Context
   189   case object Ignored extends Line_Context
   190   case object At extends Line_Context
   190   case object At extends Line_Context
   191   case class Item_Start(kind: String) extends Line_Context
   191   case class Item_Start(kind: String) extends Line_Context
   192   case class Item_Open(kind: String, right: String) extends Line_Context
   192   case class Item_Open(kind: String, end: String) extends Line_Context
   193   case class Item(kind: String, right: String, delim: Delimited) extends Line_Context
   193   case class Item(kind: String, end: String, delim: Delimited) extends Line_Context
   194 
   194 
   195   case class Delimited(quoted: Boolean, depth: Int)
   195   case class Delimited(quoted: Boolean, depth: Int)
   196   val Closed = Delimited(false, 0)
   196   val Closed = Delimited(false, 0)
   197 
   197 
   198   private def token(kind: Token.Kind.Value)(source: String): Token = Token(kind, source)
   198   private def token(kind: Token.Kind.Value)(source: String): Token = Token(kind, source)
   276 
   276 
   277 
   277 
   278     /* other tokens */
   278     /* other tokens */
   279 
   279 
   280     private val at = "@" ^^ keyword
   280     private val at = "@" ^^ keyword
   281     private val left_brace = "{" ^^ keyword
       
   282     private val right_brace = "}" ^^ keyword
       
   283     private val left_paren = "(" ^^ keyword
       
   284     private val right_paren = ")" ^^ keyword
       
   285 
   281 
   286     private val nat = "[0-9]+".r ^^ token(Token.Kind.NAT)
   282     private val nat = "[0-9]+".r ^^ token(Token.Kind.NAT)
       
   283 
       
   284     private val name = """[\x21-\x7f&&[^"#%'(),={}]]+""".r ^^ token(Token.Kind.NAME)
   287 
   285 
   288     private val identifier =
   286     private val identifier =
   289       """[\x21-\x7f&&[^"#%'(),={}0-9]][\x21-\x7f&&[^"#%'(),={}]]*""".r
   287       """[\x21-\x7f&&[^"#%'(),={}0-9]][\x21-\x7f&&[^"#%'(),={}]]*""".r
   290 
   288 
   291     private val ident = identifier ^^ token(Token.Kind.IDENT)
   289     private val ident = identifier ^^ token(Token.Kind.IDENT)
   292 
   290 
   293     val other_token = "[=#,]".r ^^ keyword | (nat | (ident | space))
   291     val other_token = "[=#,]".r ^^ keyword | (nat | (ident | space))
       
   292 
       
   293 
       
   294     /* body */
       
   295 
       
   296     private val body =
       
   297       delimited | (recover_delimited | other_token)
       
   298 
       
   299     private def body_line(ctxt: Item) =
       
   300       if (ctxt.delim.depth > 0)
       
   301         delimited_line(ctxt)
       
   302       else
       
   303         delimited_line(ctxt) |
       
   304         other_token ^^ { case a => (Chunk(ctxt.kind, List(a)), ctxt) } |
       
   305         ctxt.end ^^ { case a => (Chunk(ctxt.kind, List(keyword(a))), Ignored) }
   294 
   306 
   295 
   307 
   296     /* items: command or entry */
   308     /* items: command or entry */
   297 
   309 
   298     private val item_kind =
   310     private val item_kind =
   302           else if (is_entry(a)) Token.Kind.ENTRY
   314           else if (is_entry(a)) Token.Kind.ENTRY
   303           else Token.Kind.IDENT
   315           else Token.Kind.IDENT
   304         Token(kind, a)
   316         Token(kind, a)
   305       }
   317       }
   306 
   318 
       
   319     private val item_begin =
       
   320       "{" ^^ { case a => ("}", keyword(a)) } |
       
   321       "(" ^^ { case a => (")", keyword(a)) }
       
   322 
       
   323     private def item_name(kind: String) =
       
   324       kind.toLowerCase match {
       
   325         case "preamble" => failure("")
       
   326         case "string" => identifier ^^ token(Token.Kind.NAME)
       
   327         case _ => name
       
   328       }
       
   329 
   307     private val item_start =
   330     private val item_start =
   308       at ~ spaces ~ item_kind ~ spaces ^^
   331       at ~ spaces ~ item_kind ~ spaces ^^
   309         { case a ~ b ~ c ~ d => (c.source, List(a) ::: b ::: List(c) ::: d) }
   332         { case a ~ b ~ c ~ d => (c.source, List(a) ::: b ::: List(c) ::: d) }
   310 
   333 
   311     private val item_name = identifier ^^ token(Token.Kind.NAME)
       
   312 
       
   313     private val item_body =
       
   314       delimited | (recover_delimited | other_token)
       
   315 
       
   316     private val item: Parser[Chunk] =
   334     private val item: Parser[Chunk] =
   317       (item_start ~ left_brace ~ spaces ~ item_name ~ rep(item_body) ~ opt(right_brace) |
   335       (item_start ~ item_begin ~ spaces) into
   318        item_start ~ left_paren ~ spaces ~ item_name ~ rep(item_body) ~ opt(right_paren)) ^^
   336         { case (kind, a) ~ ((end, b)) ~ c =>
   319         { case (kind, a) ~ b ~ c ~ d ~ e ~ f =>
   337             opt(item_name(kind)) ~ rep(body) ~ opt(end ^^ keyword) ^^ {
   320             Chunk(kind, a ::: List(b) ::: c ::: List(d) ::: e ::: f.toList) }
   338               case d ~ e ~ f => Chunk(kind, a ::: List(b) ::: c ::: d.toList ::: e ::: f.toList) } }
   321 
       
   322 
       
   323     /* chunks */
       
   324 
   339 
   325     private val recover_item: Parser[Chunk] =
   340     private val recover_item: Parser[Chunk] =
   326       at ~ "(?m)[^@]*".r ^^ { case a ~ b => Chunk("", List(a, Token(Token.Kind.ERROR, b))) }
   341       at ~ "(?m)[^@]*".r ^^ { case a ~ b => Chunk("", List(a, Token(Token.Kind.ERROR, b))) }
   327 
   342 
   328     private def item_body_line(ctxt: Item) =
   343 
   329       if (ctxt.delim.depth > 0)
   344     /* chunks */
   330         delimited_line(ctxt)
       
   331       else
       
   332         delimited_line(ctxt) |
       
   333         other_token ^^ { case a => (Chunk(ctxt.kind, List(a)), ctxt) } |
       
   334         ctxt.right ^^ { case a => (Chunk(ctxt.kind, List(keyword(a))), Ignored) }
       
   335 
   345 
   336     val chunk: Parser[Chunk] = ignored | (item | recover_item)
   346     val chunk: Parser[Chunk] = ignored | (item | recover_item)
   337 
   347 
   338     def chunk_line(ctxt: Line_Context): Parser[(Chunk, Line_Context)] =
   348     def chunk_line(ctxt: Line_Context): Parser[(Chunk, Line_Context)] =
   339     {
   349     {
   348           recover_item ^^ { case a => (a, Ignored) } |
   358           recover_item ^^ { case a => (a, Ignored) } |
   349           ignored_line
   359           ignored_line
   350 
   360 
   351         case Item_Start(kind) =>
   361         case Item_Start(kind) =>
   352           space ^^ { case a => (Chunk(kind, List(a)), ctxt) } |
   362           space ^^ { case a => (Chunk(kind, List(a)), ctxt) } |
   353           left_brace ^^ { case a => (Chunk(kind, List(a)), Item_Open(kind, "}")) } |
   363           item_begin ^^ { case (end, a) => (Chunk(kind, List(a)), Item_Open(kind, end)) } |
   354           left_paren ^^ { case a => (Chunk(kind, List(a)), Item_Open(kind, ")")) } |
       
   355           ignored_line
   364           ignored_line
   356 
   365 
   357         case Item_Open(kind, right) =>
   366         case Item_Open(kind, end) =>
   358           space ^^ { case a => (Chunk(kind, List(a)), ctxt) } |
   367           space ^^ { case a => (Chunk(kind, List(a)), ctxt) } |
   359           item_name ^^ { case a => (Chunk(kind, List(a)), Item(kind, right, Closed)) } |
   368           item_name(kind) ^^ { case a => (Chunk(kind, List(a)), Item(kind, end, Closed)) } |
   360           item_body_line(Item(kind, right, Closed)) |
   369           body_line(Item(kind, end, Closed)) |
   361           ignored_line
   370           ignored_line
   362 
   371 
   363         case item_ctxt: Item =>
   372         case item_ctxt: Item =>
   364           item_body_line(item_ctxt) |
   373           body_line(item_ctxt) |
   365           ignored_line
   374           ignored_line
   366 
   375 
   367         case _ => failure("")
   376         case _ => failure("")
   368       }
   377       }
   369     }
   378     }