src/Pure/Isar/token.scala
author wenzelm
Sun Mar 15 21:57:10 2015 +0100 (2015-03-15)
changeset 59706 bf6ca55aae13
parent 59705 740a0ca7e09b
child 59715 4f0d0e4ad68d
permissions -rw-r--r--
proper command id for inlined errors, which is important for Command.State.accumulate;
     1 /*  Title:      Pure/Isar/token.scala
     2     Author:     Makarius
     3 
     4 Outer token syntax for Isabelle/Isar.
     5 */
     6 
     7 package isabelle
     8 
     9 
    10 import scala.collection.mutable
    11 import scala.util.parsing.input
    12 
    13 
    14 object Token
    15 {
    16   /* tokens */
    17 
    18   object Kind extends Enumeration
    19   {
    20     /*immediate source*/
    21     val COMMAND = Value("command")
    22     val KEYWORD = Value("keyword")
    23     val IDENT = Value("identifier")
    24     val LONG_IDENT = Value("long identifier")
    25     val SYM_IDENT = Value("symbolic identifier")
    26     val VAR = Value("schematic variable")
    27     val TYPE_IDENT = Value("type variable")
    28     val TYPE_VAR = Value("schematic type variable")
    29     val NAT = Value("natural number")
    30     val FLOAT = Value("floating-point number")
    31     val SPACE = Value("white space")
    32     /*delimited content*/
    33     val STRING = Value("string")
    34     val ALT_STRING = Value("back-quoted string")
    35     val VERBATIM = Value("verbatim text")
    36     val CARTOUCHE = Value("text cartouche")
    37     val COMMENT = Value("comment text")
    38     /*special content*/
    39     val ERROR = Value("bad input")
    40     val UNPARSED = Value("unparsed input")
    41   }
    42 
    43 
    44   /* parsers */
    45 
    46   object Parsers extends Parsers
    47 
    48   trait Parsers extends Scan.Parsers
    49   {
    50     private def delimited_token: Parser[Token] =
    51     {
    52       val string = quoted("\"") ^^ (x => Token(Token.Kind.STRING, x))
    53       val alt_string = quoted("`") ^^ (x => Token(Token.Kind.ALT_STRING, x))
    54       val verb = verbatim ^^ (x => Token(Token.Kind.VERBATIM, x))
    55       val cart = cartouche ^^ (x => Token(Token.Kind.CARTOUCHE, x))
    56       val cmt = comment ^^ (x => Token(Token.Kind.COMMENT, x))
    57 
    58       string | (alt_string | (verb | (cart | cmt)))
    59     }
    60 
    61     private def other_token(keywords: Keyword.Keywords): Parser[Token] =
    62     {
    63       val letdigs1 = many1(Symbol.is_letdig)
    64       val sub = one(s => s == Symbol.sub_decoded || s == "\\<^sub>")
    65       val id =
    66         one(Symbol.is_letter) ~
    67           (rep(letdigs1 | (sub ~ letdigs1 ^^ { case x ~ y => x + y })) ^^ (_.mkString)) ^^
    68         { case x ~ y => x + y }
    69 
    70       val nat = many1(Symbol.is_digit)
    71       val natdot = nat ~ "." ~ nat ^^ { case x ~ y ~ z => x + y + z }
    72       val id_nat = id ~ opt("." ~ nat) ^^ { case x ~ Some(y ~ z) => x + y + z case x ~ None => x }
    73 
    74       val ident = id ~ rep("." ~> id) ^^
    75         { case x ~ Nil => Token(Token.Kind.IDENT, x)
    76           case x ~ ys => Token(Token.Kind.LONG_IDENT, (x :: ys).mkString(".")) }
    77 
    78       val var_ = "?" ~ id_nat ^^ { case x ~ y => Token(Token.Kind.VAR, x + y) }
    79       val type_ident = "'" ~ id ^^ { case x ~ y => Token(Token.Kind.TYPE_IDENT, x + y) }
    80       val type_var = "?'" ~ id_nat ^^ { case x ~ y => Token(Token.Kind.TYPE_VAR, x + y) }
    81       val nat_ = nat ^^ (x => Token(Token.Kind.NAT, x))
    82       val float =
    83         ("-" ~ natdot ^^ { case x ~ y => x + y } | natdot) ^^ (x => Token(Token.Kind.FLOAT, x))
    84 
    85       val sym_ident =
    86         (many1(Symbol.is_symbolic_char) | one(sym => Symbol.is_symbolic(sym))) ^^
    87         (x => Token(Token.Kind.SYM_IDENT, x))
    88 
    89       val keyword =
    90         literal(keywords.minor) ^^ (x => Token(Token.Kind.KEYWORD, x)) |||
    91         literal(keywords.major) ^^ (x => Token(Token.Kind.COMMAND, x))
    92 
    93       val space = many1(Symbol.is_blank) ^^ (x => Token(Token.Kind.SPACE, x))
    94 
    95       val recover_delimited =
    96         (recover_quoted("\"") |
    97           (recover_quoted("`") |
    98             (recover_verbatim |
    99               (recover_cartouche | recover_comment)))) ^^ (x => Token(Token.Kind.ERROR, x))
   100 
   101       val bad = one(_ => true) ^^ (x => Token(Token.Kind.ERROR, x))
   102 
   103       space | (recover_delimited |
   104         (((ident | (var_ | (type_ident | (type_var | (float | (nat_ | sym_ident)))))) |||
   105           keyword) | bad))
   106     }
   107 
   108     def token(keywords: Keyword.Keywords): Parser[Token] =
   109       delimited_token | other_token(keywords)
   110 
   111     def token_line(keywords: Keyword.Keywords, ctxt: Scan.Line_Context)
   112       : Parser[(Token, Scan.Line_Context)] =
   113     {
   114       val string =
   115         quoted_line("\"", ctxt) ^^ { case (x, c) => (Token(Token.Kind.STRING, x), c) }
   116       val alt_string =
   117         quoted_line("`", ctxt) ^^ { case (x, c) => (Token(Token.Kind.ALT_STRING, x), c) }
   118       val verb = verbatim_line(ctxt) ^^ { case (x, c) => (Token(Token.Kind.VERBATIM, x), c) }
   119       val cart = cartouche_line(ctxt) ^^ { case (x, c) => (Token(Token.Kind.CARTOUCHE, x), c) }
   120       val cmt = comment_line(ctxt) ^^ { case (x, c) => (Token(Token.Kind.COMMENT, x), c) }
   121       val other = other_token(keywords) ^^ { case x => (x, Scan.Finished) }
   122 
   123       string | (alt_string | (verb | (cart | (cmt | other))))
   124     }
   125   }
   126 
   127 
   128   /* explode */
   129 
   130   def explode(keywords: Keyword.Keywords, inp: CharSequence): List[Token] =
   131   {
   132     val in: input.Reader[Char] = new input.CharSequenceReader(inp)
   133     Parsers.parseAll(Parsers.rep(Parsers.token(keywords)), in) match {
   134       case Parsers.Success(tokens, _) => tokens
   135       case _ => error("Unexpected failure of tokenizing input:\n" + inp.toString)
   136     }
   137   }
   138 
   139   def explode_line(keywords: Keyword.Keywords, inp: CharSequence, context: Scan.Line_Context)
   140     : (List[Token], Scan.Line_Context) =
   141   {
   142     var in: input.Reader[Char] = new input.CharSequenceReader(inp)
   143     val toks = new mutable.ListBuffer[Token]
   144     var ctxt = context
   145     while (!in.atEnd) {
   146       Parsers.parse(Parsers.token_line(keywords, ctxt), in) match {
   147         case Parsers.Success((x, c), rest) => { toks += x; ctxt = c; in = rest }
   148         case Parsers.NoSuccess(_, rest) =>
   149           error("Unexpected failure of tokenizing input:\n" + rest.source.toString)
   150       }
   151     }
   152     (toks.toList, ctxt)
   153   }
   154 
   155 
   156   /* token reader */
   157 
   158   object Pos
   159   {
   160     val none: Pos = new Pos(0, 0, "", "")
   161     val start: Pos = new Pos(1, 1, "", "")
   162     def file(file: String): Pos = new Pos(1, 1, file, "")
   163     def id(id: String): Pos = new Pos(0, 1, "", id)
   164   }
   165 
   166   final class Pos private[Token](
   167       val line: Int,
   168       val offset: Symbol.Offset,
   169       val file: String,
   170       val id: String)
   171     extends scala.util.parsing.input.Position
   172   {
   173     def column = 0
   174     def lineContents = ""
   175 
   176     def advance(token: Token): Pos =
   177     {
   178       var line1 = line
   179       var offset1 = offset
   180       for (s <- Symbol.iterator(token.source)) {
   181         if (line1 > 0 && Symbol.is_newline(s)) line1 += 1
   182         if (offset1 > 0) offset1 += 1
   183       }
   184       if (line1 == line && offset1 == offset) this
   185       else new Pos(line1, offset1, file, id)
   186     }
   187 
   188     private def position(end_offset: Symbol.Offset): Position.T =
   189       (if (line > 0) Position.Line(line) else Nil) :::
   190       (if (offset > 0) Position.Offset(offset) else Nil) :::
   191       (if (end_offset > 0) Position.End_Offset(end_offset) else Nil) :::
   192       (if (file != "") Position.File(file) else Nil) :::
   193       (if (id != "") Position.Id_String(id) else Nil)
   194 
   195     def position(): Position.T = position(0)
   196     def position(token: Token): Position.T = position(advance(token).offset)
   197 
   198     override def toString: String = Position.here_undelimited(position())
   199   }
   200 
   201   abstract class Reader extends scala.util.parsing.input.Reader[Token]
   202 
   203   private class Token_Reader(tokens: List[Token], val pos: Pos) extends Reader
   204   {
   205     def first = tokens.head
   206     def rest = new Token_Reader(tokens.tail, pos.advance(first))
   207     def atEnd = tokens.isEmpty
   208   }
   209 
   210   def reader(tokens: List[Token], start: Token.Pos): Reader =
   211     new Token_Reader(tokens, start)
   212 }
   213 
   214 
   215 sealed case class Token(val kind: Token.Kind.Value, val source: String)
   216 {
   217   def is_command: Boolean = kind == Token.Kind.COMMAND
   218   def is_command_kind(keywords: Keyword.Keywords, pred: String => Boolean): Boolean =
   219     is_command && keywords.is_command_kind(source, pred)
   220   def is_keyword: Boolean = kind == Token.Kind.KEYWORD
   221   def is_delimiter: Boolean = is_keyword && !Symbol.is_ascii_identifier(source)
   222   def is_ident: Boolean = kind == Token.Kind.IDENT
   223   def is_sym_ident: Boolean = kind == Token.Kind.SYM_IDENT
   224   def is_string: Boolean = kind == Token.Kind.STRING
   225   def is_nat: Boolean = kind == Token.Kind.NAT
   226   def is_float: Boolean = kind == Token.Kind.FLOAT
   227   def is_name: Boolean =
   228     kind == Token.Kind.IDENT ||
   229     kind == Token.Kind.SYM_IDENT ||
   230     kind == Token.Kind.STRING ||
   231     kind == Token.Kind.NAT
   232   def is_xname: Boolean = is_name || kind == Token.Kind.LONG_IDENT
   233   def is_text: Boolean = is_xname || kind == Token.Kind.VERBATIM || kind == Token.Kind.CARTOUCHE
   234   def is_space: Boolean = kind == Token.Kind.SPACE
   235   def is_comment: Boolean = kind == Token.Kind.COMMENT
   236   def is_improper: Boolean = is_space || is_comment
   237   def is_proper: Boolean = !is_space && !is_comment
   238   def is_error: Boolean = kind == Token.Kind.ERROR
   239   def is_unparsed: Boolean = kind == Token.Kind.UNPARSED
   240 
   241   def is_unfinished: Boolean = is_error &&
   242    (source.startsWith("\"") ||
   243     source.startsWith("`") ||
   244     source.startsWith("{*") ||
   245     source.startsWith("(*") ||
   246     source.startsWith(Symbol.open) ||
   247     source.startsWith(Symbol.open_decoded))
   248 
   249   def is_begin: Boolean = is_keyword && source == "begin"
   250   def is_end: Boolean = is_command && source == "end"
   251 
   252   def is_begin_block: Boolean = is_command && source == "{"
   253   def is_end_block: Boolean = is_command && source == "}"
   254 
   255   def content: String =
   256     if (kind == Token.Kind.STRING) Scan.Parsers.quoted_content("\"", source)
   257     else if (kind == Token.Kind.ALT_STRING) Scan.Parsers.quoted_content("`", source)
   258     else if (kind == Token.Kind.VERBATIM) Scan.Parsers.verbatim_content(source)
   259     else if (kind == Token.Kind.CARTOUCHE) Scan.Parsers.cartouche_content(source)
   260     else if (kind == Token.Kind.COMMENT) Scan.Parsers.comment_content(source)
   261     else source
   262 }
   263