src/Pure/Isar/token.scala
author wenzelm
Wed, 02 Jul 2014 12:12:26 +0200
changeset 57486 2131b6633529
parent 57021 6a8fd2ac6756
child 58751 6de7dbaf3c44
permissions -rw-r--r--
check 'case' variable bindings as for 'fix', which means internal names are rejected as usual;
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
36956
21be4832c362 renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents: 34311
diff changeset
     1
/*  Title:      Pure/Isar/token.scala
34139
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
     2
    Author:     Makarius
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
     3
36956
21be4832c362 renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents: 34311
diff changeset
     4
Outer token syntax for Isabelle/Isar.
34139
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
     5
*/
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
     6
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
     7
package isabelle
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
     8
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
     9
36956
21be4832c362 renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents: 34311
diff changeset
    10
object Token
34139
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
    11
{
34157
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    12
  /* tokens */
34139
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
    13
36956
21be4832c362 renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents: 34311
diff changeset
    14
  object Kind extends Enumeration
34139
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
    15
  {
34157
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    16
    val COMMAND = Value("command")
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    17
    val KEYWORD = Value("keyword")
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    18
    val IDENT = Value("identifier")
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    19
    val LONG_IDENT = Value("long identifier")
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    20
    val SYM_IDENT = Value("symbolic identifier")
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    21
    val VAR = Value("schematic variable")
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    22
    val TYPE_IDENT = Value("type variable")
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    23
    val TYPE_VAR = Value("schematic type variable")
40290
47f572aff50a support for floating-point tokens in outer syntax (coinciding with inner syntax version);
wenzelm
parents: 38367
diff changeset
    24
    val NAT = Value("natural number")
47f572aff50a support for floating-point tokens in outer syntax (coinciding with inner syntax version);
wenzelm
parents: 38367
diff changeset
    25
    val FLOAT = Value("floating-point number")
34157
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    26
    val STRING = Value("string")
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    27
    val ALT_STRING = Value("back-quoted string")
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    28
    val VERBATIM = Value("verbatim text")
55512
75c68e05f9ea support ML antiquotations in Scala;
wenzelm
parents: 55510
diff changeset
    29
    val CARTOUCHE = Value("text cartouche")
34157
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    30
    val SPACE = Value("white space")
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    31
    val COMMENT = Value("comment text")
48754
c2c1e5944536 clarified undefined, unparsed, unfinished command spans;
wenzelm
parents: 48718
diff changeset
    32
    val ERROR = Value("bad input")
34157
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    33
    val UNPARSED = Value("unparsed input")
34139
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
    34
  }
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
    35
34157
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    36
55494
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    37
  /* parsers */
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    38
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    39
  object Parsers extends Parsers
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    40
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    41
  trait Parsers extends Scan.Parsers
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    42
  {
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    43
    private def delimited_token: Parser[Token] =
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    44
    {
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    45
      val string = quoted("\"") ^^ (x => Token(Token.Kind.STRING, x))
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    46
      val alt_string = quoted("`") ^^ (x => Token(Token.Kind.ALT_STRING, x))
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    47
      val verb = verbatim ^^ (x => Token(Token.Kind.VERBATIM, x))
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    48
      val cart = cartouche ^^ (x => Token(Token.Kind.CARTOUCHE, x))
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    49
      val cmt = comment ^^ (x => Token(Token.Kind.COMMENT, x))
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    50
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    51
      string | (alt_string | (verb | (cart | cmt)))
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    52
    }
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    53
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    54
    private def other_token(lexicon: Scan.Lexicon, is_command: String => Boolean)
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    55
      : Parser[Token] =
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    56
    {
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    57
      val letdigs1 = many1(Symbol.is_letdig)
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    58
      val sub = one(s => s == Symbol.sub_decoded || s == "\\<^sub>")
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    59
      val id =
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    60
        one(Symbol.is_letter) ~
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    61
          (rep(letdigs1 | (sub ~ letdigs1 ^^ { case x ~ y => x + y })) ^^ (_.mkString)) ^^
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    62
        { case x ~ y => x + y }
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    63
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    64
      val nat = many1(Symbol.is_digit)
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    65
      val natdot = nat ~ "." ~ nat ^^ { case x ~ y ~ z => x + y + z }
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    66
      val id_nat = id ~ opt("." ~ nat) ^^ { case x ~ Some(y ~ z) => x + y + z case x ~ None => x }
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    67
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    68
      val ident = id ~ rep("." ~> id) ^^
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    69
        { case x ~ Nil => Token(Token.Kind.IDENT, x)
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    70
          case x ~ ys => Token(Token.Kind.LONG_IDENT, (x :: ys).mkString(".")) }
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    71
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    72
      val var_ = "?" ~ id_nat ^^ { case x ~ y => Token(Token.Kind.VAR, x + y) }
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    73
      val type_ident = "'" ~ id ^^ { case x ~ y => Token(Token.Kind.TYPE_IDENT, x + y) }
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    74
      val type_var = "?'" ~ id_nat ^^ { case x ~ y => Token(Token.Kind.TYPE_VAR, x + y) }
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    75
      val nat_ = nat ^^ (x => Token(Token.Kind.NAT, x))
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    76
      val float =
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    77
        ("-" ~ natdot ^^ { case x ~ y => x + y } | natdot) ^^ (x => Token(Token.Kind.FLOAT, x))
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    78
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    79
      val sym_ident =
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    80
        (many1(Symbol.is_symbolic_char) | one(sym => Symbol.is_symbolic(sym))) ^^
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    81
        (x => Token(Token.Kind.SYM_IDENT, x))
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    82
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    83
      val command_keyword =
55497
c0f8aebfb43d lexical syntax for SML (in Scala);
wenzelm
parents: 55494
diff changeset
    84
        literal(lexicon) ^^
55494
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    85
          (x => Token(if (is_command(x)) Token.Kind.COMMAND else Token.Kind.KEYWORD, x))
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    86
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    87
      val space = many1(Symbol.is_blank) ^^ (x => Token(Token.Kind.SPACE, x))
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    88
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    89
      val recover_delimited =
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    90
        (recover_quoted("\"") |
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    91
          (recover_quoted("`") |
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    92
            (recover_verbatim |
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    93
              (recover_cartouche | recover_comment)))) ^^ (x => Token(Token.Kind.ERROR, x))
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    94
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    95
      val bad = one(_ => true) ^^ (x => Token(Token.Kind.ERROR, x))
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    96
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    97
      space | (recover_delimited |
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    98
        (((ident | (var_ | (type_ident | (type_var | (float | (nat_ | sym_ident)))))) |||
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    99
          command_keyword) | bad))
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
   100
    }
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
   101
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
   102
    def token(lexicon: Scan.Lexicon, is_command: String => Boolean): Parser[Token] =
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
   103
      delimited_token | other_token(lexicon, is_command)
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
   104
55510
1585a65aad64 tuned signature -- emphasize line-oriented aspect;
wenzelm
parents: 55505
diff changeset
   105
    def token_line(lexicon: Scan.Lexicon, is_command: String => Boolean, ctxt: Scan.Line_Context)
1585a65aad64 tuned signature -- emphasize line-oriented aspect;
wenzelm
parents: 55505
diff changeset
   106
      : Parser[(Token, Scan.Line_Context)] =
55494
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
   107
    {
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
   108
      val string =
55510
1585a65aad64 tuned signature -- emphasize line-oriented aspect;
wenzelm
parents: 55505
diff changeset
   109
        quoted_line("\"", ctxt) ^^ { case (x, c) => (Token(Token.Kind.STRING, x), c) }
55494
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
   110
      val alt_string =
55510
1585a65aad64 tuned signature -- emphasize line-oriented aspect;
wenzelm
parents: 55505
diff changeset
   111
        quoted_line("`", ctxt) ^^ { case (x, c) => (Token(Token.Kind.ALT_STRING, x), c) }
1585a65aad64 tuned signature -- emphasize line-oriented aspect;
wenzelm
parents: 55505
diff changeset
   112
      val verb = verbatim_line(ctxt) ^^ { case (x, c) => (Token(Token.Kind.VERBATIM, x), c) }
1585a65aad64 tuned signature -- emphasize line-oriented aspect;
wenzelm
parents: 55505
diff changeset
   113
      val cart = cartouche_line(ctxt) ^^ { case (x, c) => (Token(Token.Kind.CARTOUCHE, x), c) }
1585a65aad64 tuned signature -- emphasize line-oriented aspect;
wenzelm
parents: 55505
diff changeset
   114
      val cmt = comment_line(ctxt) ^^ { case (x, c) => (Token(Token.Kind.COMMENT, x), c) }
55494
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
   115
      val other = other_token(lexicon, is_command) ^^ { case x => (x, Scan.Finished) }
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
   116
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
   117
      string | (alt_string | (verb | (cart | (cmt | other))))
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
   118
    }
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
   119
  }
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
   120
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
   121
34157
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
   122
  /* token reader */
34139
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
   123
56464
555f4be59be6 more precise token positions;
wenzelm
parents: 55512
diff changeset
   124
  object Pos
555f4be59be6 more precise token positions;
wenzelm
parents: 55512
diff changeset
   125
  {
555f4be59be6 more precise token positions;
wenzelm
parents: 55512
diff changeset
   126
    val none: Pos = new Pos(0, "")
555f4be59be6 more precise token positions;
wenzelm
parents: 55512
diff changeset
   127
  }
555f4be59be6 more precise token positions;
wenzelm
parents: 55512
diff changeset
   128
555f4be59be6 more precise token positions;
wenzelm
parents: 55512
diff changeset
   129
  final class Pos private[Token](val line: Int, val file: String)
555f4be59be6 more precise token positions;
wenzelm
parents: 55512
diff changeset
   130
    extends scala.util.parsing.input.Position
34139
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
   131
  {
34157
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
   132
    def column = 0
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
   133
    def lineContents = ""
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
   134
56464
555f4be59be6 more precise token positions;
wenzelm
parents: 55512
diff changeset
   135
    def advance(token: Token): Pos =
34157
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
   136
    {
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
   137
      var n = 0
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
   138
      for (c <- token.content if c == '\n') n += 1
56464
555f4be59be6 more precise token positions;
wenzelm
parents: 55512
diff changeset
   139
      if (n == 0) this else new Pos(line + n, file)
34157
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
   140
    }
56464
555f4be59be6 more precise token positions;
wenzelm
parents: 55512
diff changeset
   141
555f4be59be6 more precise token positions;
wenzelm
parents: 55512
diff changeset
   142
    def position: Position.T = Position.Line_File(line, file)
56532
3da244bc02bd tuned message, to accommodate extra brackets produced by Scala parsers;
wenzelm
parents: 56464
diff changeset
   143
    override def toString: String = Position.here_undelimited(position)
34139
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
   144
  }
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
   145
34157
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
   146
  abstract class Reader extends scala.util.parsing.input.Reader[Token]
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
   147
56464
555f4be59be6 more precise token positions;
wenzelm
parents: 55512
diff changeset
   148
  private class Token_Reader(tokens: List[Token], val pos: Pos) extends Reader
34139
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
   149
  {
34157
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
   150
    def first = tokens.head
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
   151
    def rest = new Token_Reader(tokens.tail, pos.advance(first))
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
   152
    def atEnd = tokens.isEmpty
34139
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
   153
  }
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
   154
48335
2f923e994056 more informative errors;
wenzelm
parents: 47012
diff changeset
   155
  def reader(tokens: List[Token], file: String = ""): Reader =
56464
555f4be59be6 more precise token positions;
wenzelm
parents: 55512
diff changeset
   156
    new Token_Reader(tokens, new Pos(1, file))
34139
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
   157
}
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
   158
36956
21be4832c362 renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents: 34311
diff changeset
   159
21be4832c362 renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents: 34311
diff changeset
   160
sealed case class Token(val kind: Token.Kind.Value, val source: String)
21be4832c362 renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents: 34311
diff changeset
   161
{
21be4832c362 renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents: 34311
diff changeset
   162
  def is_command: Boolean = kind == Token.Kind.COMMAND
48718
73e6c22e2d94 more structural parsing for minor modes;
wenzelm
parents: 48605
diff changeset
   163
  def is_keyword: Boolean = kind == Token.Kind.KEYWORD
55505
2a1ca7f6607b more uniform ML keyword markup;
wenzelm
parents: 55497
diff changeset
   164
  def is_delimiter: Boolean = is_keyword && !Symbol.is_ascii_identifier(source)
48365
d88aefda01c4 basic support for stand-alone options with external string representation;
wenzelm
parents: 48349
diff changeset
   165
  def is_ident: Boolean = kind == Token.Kind.IDENT
48605
e777363440d6 allow negative int values as well, according to real = int | float;
wenzelm
parents: 48599
diff changeset
   166
  def is_sym_ident: Boolean = kind == Token.Kind.SYM_IDENT
46943
ac1c41ea856d clarified syntax of prospective keywords;
wenzelm
parents: 43611
diff changeset
   167
  def is_string: Boolean = kind == Token.Kind.STRING
48349
a78e5d399599 support Session.Queue with ordering and dependencies;
wenzelm
parents: 48335
diff changeset
   168
  def is_nat: Boolean = kind == Token.Kind.NAT
48365
d88aefda01c4 basic support for stand-alone options with external string representation;
wenzelm
parents: 48349
diff changeset
   169
  def is_float: Boolean = kind == Token.Kind.FLOAT
36956
21be4832c362 renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents: 34311
diff changeset
   170
  def is_name: Boolean =
21be4832c362 renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents: 34311
diff changeset
   171
    kind == Token.Kind.IDENT ||
21be4832c362 renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents: 34311
diff changeset
   172
    kind == Token.Kind.SYM_IDENT ||
21be4832c362 renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents: 34311
diff changeset
   173
    kind == Token.Kind.STRING ||
21be4832c362 renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents: 34311
diff changeset
   174
    kind == Token.Kind.NAT
21be4832c362 renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents: 34311
diff changeset
   175
  def is_xname: Boolean = is_name || kind == Token.Kind.LONG_IDENT
56998
ebf3c9681406 clarified is_text in accordance to ML version (7e0178c84994), e.g. relevant for 'header' syntax in PIDE front-end;
wenzelm
parents: 56532
diff changeset
   176
  def is_text: Boolean = is_xname || kind == Token.Kind.VERBATIM || kind == Token.Kind.CARTOUCHE
36956
21be4832c362 renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents: 34311
diff changeset
   177
  def is_space: Boolean = kind == Token.Kind.SPACE
21be4832c362 renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents: 34311
diff changeset
   178
  def is_comment: Boolean = kind == Token.Kind.COMMENT
51048
123be08eed88 clarified notion of Command.proper_range (according to Token.is_proper), especially relevant for Active.try_replace_command, to avoid loosing subsequent comments accidentally;
wenzelm
parents: 48754
diff changeset
   179
  def is_improper: Boolean = is_space || is_comment
48599
5e64b7770f35 tuned signature;
wenzelm
parents: 48365
diff changeset
   180
  def is_proper: Boolean = !is_space && !is_comment
48754
c2c1e5944536 clarified undefined, unparsed, unfinished command spans;
wenzelm
parents: 48718
diff changeset
   181
  def is_error: Boolean = kind == Token.Kind.ERROR
47012
0e246130486b clarified command span classification: strict Command.is_command, permissive Command.name;
wenzelm
parents: 46943
diff changeset
   182
  def is_unparsed: Boolean = kind == Token.Kind.UNPARSED
36956
21be4832c362 renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents: 34311
diff changeset
   183
48754
c2c1e5944536 clarified undefined, unparsed, unfinished command spans;
wenzelm
parents: 48718
diff changeset
   184
  def is_unfinished: Boolean = is_error &&
c2c1e5944536 clarified undefined, unparsed, unfinished command spans;
wenzelm
parents: 48718
diff changeset
   185
   (source.startsWith("\"") ||
c2c1e5944536 clarified undefined, unparsed, unfinished command spans;
wenzelm
parents: 48718
diff changeset
   186
    source.startsWith("`") ||
c2c1e5944536 clarified undefined, unparsed, unfinished command spans;
wenzelm
parents: 48718
diff changeset
   187
    source.startsWith("{*") ||
57021
6a8fd2ac6756 explicit treatment of unfinished cartouches, which is important for Thy_Syntax.consolidate_spans;
wenzelm
parents: 56998
diff changeset
   188
    source.startsWith("(*") ||
6a8fd2ac6756 explicit treatment of unfinished cartouches, which is important for Thy_Syntax.consolidate_spans;
wenzelm
parents: 56998
diff changeset
   189
    source.startsWith(Symbol.open) ||
6a8fd2ac6756 explicit treatment of unfinished cartouches, which is important for Thy_Syntax.consolidate_spans;
wenzelm
parents: 56998
diff changeset
   190
    source.startsWith(Symbol.open_decoded))
48754
c2c1e5944536 clarified undefined, unparsed, unfinished command spans;
wenzelm
parents: 48718
diff changeset
   191
48718
73e6c22e2d94 more structural parsing for minor modes;
wenzelm
parents: 48605
diff changeset
   192
  def is_begin: Boolean = is_keyword && source == "begin"
73e6c22e2d94 more structural parsing for minor modes;
wenzelm
parents: 48605
diff changeset
   193
  def is_end: Boolean = is_keyword && source == "end"
43611
21a57a0c5f25 more general theory header parsing;
wenzelm
parents: 43430
diff changeset
   194
36956
21be4832c362 renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents: 34311
diff changeset
   195
  def content: String =
55492
28d4db6c6e79 tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
wenzelm
parents: 55137
diff changeset
   196
    if (kind == Token.Kind.STRING) Scan.Parsers.quoted_content("\"", source)
28d4db6c6e79 tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
wenzelm
parents: 55137
diff changeset
   197
    else if (kind == Token.Kind.ALT_STRING) Scan.Parsers.quoted_content("`", source)
28d4db6c6e79 tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
wenzelm
parents: 55137
diff changeset
   198
    else if (kind == Token.Kind.VERBATIM) Scan.Parsers.verbatim_content(source)
28d4db6c6e79 tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
wenzelm
parents: 55137
diff changeset
   199
    else if (kind == Token.Kind.CARTOUCHE) Scan.Parsers.cartouche_content(source)
28d4db6c6e79 tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
wenzelm
parents: 55137
diff changeset
   200
    else if (kind == Token.Kind.COMMENT) Scan.Parsers.comment_content(source)
36956
21be4832c362 renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents: 34311
diff changeset
   201
    else source
21be4832c362 renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents: 34311
diff changeset
   202
21be4832c362 renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents: 34311
diff changeset
   203
  def text: (String, String) =
55137
6cac9fbf9b79 semicolon is minor keyword (see also 29f1e53f9937);
wenzelm
parents: 55035
diff changeset
   204
    if (is_keyword && source == ";") ("terminator", "")
36956
21be4832c362 renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents: 34311
diff changeset
   205
    else (kind.toString, source)
21be4832c362 renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents: 34311
diff changeset
   206
}
21be4832c362 renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents: 34311
diff changeset
   207