src/Pure/Isar/token.scala
author wenzelm
Fri, 11 Apr 2014 09:36:38 +0200
changeset 56532 3da244bc02bd
parent 56464 555f4be59be6
child 56998 ebf3c9681406
permissions -rw-r--r--
tuned message, to accommodate extra brackets produced by Scala parsers;
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
36956
21be4832c362 renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents: 34311
diff changeset
     1
/*  Title:      Pure/Isar/token.scala
34139
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
     2
    Author:     Makarius
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
     3
36956
21be4832c362 renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents: 34311
diff changeset
     4
Outer token syntax for Isabelle/Isar.
34139
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
     5
*/
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
     6
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
     7
package isabelle
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
     8
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
     9
36956
21be4832c362 renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents: 34311
diff changeset
    10
object Token
34139
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
    11
{
34157
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    12
  /* tokens */
34139
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
    13
36956
21be4832c362 renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents: 34311
diff changeset
    14
  object Kind extends Enumeration
34139
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
    15
  {
34157
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    16
    val COMMAND = Value("command")
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    17
    val KEYWORD = Value("keyword")
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    18
    val IDENT = Value("identifier")
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    19
    val LONG_IDENT = Value("long identifier")
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    20
    val SYM_IDENT = Value("symbolic identifier")
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    21
    val VAR = Value("schematic variable")
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    22
    val TYPE_IDENT = Value("type variable")
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    23
    val TYPE_VAR = Value("schematic type variable")
40290
47f572aff50a support for floating-point tokens in outer syntax (coinciding with inner syntax version);
wenzelm
parents: 38367
diff changeset
    24
    val NAT = Value("natural number")
47f572aff50a support for floating-point tokens in outer syntax (coinciding with inner syntax version);
wenzelm
parents: 38367
diff changeset
    25
    val FLOAT = Value("floating-point number")
34157
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    26
    val STRING = Value("string")
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    27
    val ALT_STRING = Value("back-quoted string")
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    28
    val VERBATIM = Value("verbatim text")
55512
75c68e05f9ea support ML antiquotations in Scala;
wenzelm
parents: 55510
diff changeset
    29
    val CARTOUCHE = Value("text cartouche")
34157
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    30
    val SPACE = Value("white space")
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    31
    val COMMENT = Value("comment text")
48754
c2c1e5944536 clarified undefined, unparsed, unfinished command spans;
wenzelm
parents: 48718
diff changeset
    32
    val ERROR = Value("bad input")
34157
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    33
    val UNPARSED = Value("unparsed input")
34139
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
    34
  }
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
    35
34157
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    36
55494
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    37
  /* parsers */
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    38
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    39
  object Parsers extends Parsers
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    40
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    41
  trait Parsers extends Scan.Parsers
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    42
  {
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    43
    private def delimited_token: Parser[Token] =
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    44
    {
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    45
      val string = quoted("\"") ^^ (x => Token(Token.Kind.STRING, x))
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    46
      val alt_string = quoted("`") ^^ (x => Token(Token.Kind.ALT_STRING, x))
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    47
      val verb = verbatim ^^ (x => Token(Token.Kind.VERBATIM, x))
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    48
      val cart = cartouche ^^ (x => Token(Token.Kind.CARTOUCHE, x))
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    49
      val cmt = comment ^^ (x => Token(Token.Kind.COMMENT, x))
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    50
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    51
      string | (alt_string | (verb | (cart | cmt)))
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    52
    }
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    53
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    54
    private def other_token(lexicon: Scan.Lexicon, is_command: String => Boolean)
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    55
      : Parser[Token] =
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    56
    {
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    57
      val letdigs1 = many1(Symbol.is_letdig)
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    58
      val sub = one(s => s == Symbol.sub_decoded || s == "\\<^sub>")
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    59
      val id =
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    60
        one(Symbol.is_letter) ~
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    61
          (rep(letdigs1 | (sub ~ letdigs1 ^^ { case x ~ y => x + y })) ^^ (_.mkString)) ^^
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    62
        { case x ~ y => x + y }
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    63
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    64
      val nat = many1(Symbol.is_digit)
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    65
      val natdot = nat ~ "." ~ nat ^^ { case x ~ y ~ z => x + y + z }
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    66
      val id_nat = id ~ opt("." ~ nat) ^^ { case x ~ Some(y ~ z) => x + y + z case x ~ None => x }
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    67
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    68
      val ident = id ~ rep("." ~> id) ^^
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    69
        { case x ~ Nil => Token(Token.Kind.IDENT, x)
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    70
          case x ~ ys => Token(Token.Kind.LONG_IDENT, (x :: ys).mkString(".")) }
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    71
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    72
      val var_ = "?" ~ id_nat ^^ { case x ~ y => Token(Token.Kind.VAR, x + y) }
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    73
      val type_ident = "'" ~ id ^^ { case x ~ y => Token(Token.Kind.TYPE_IDENT, x + y) }
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    74
      val type_var = "?'" ~ id_nat ^^ { case x ~ y => Token(Token.Kind.TYPE_VAR, x + y) }
009b71c1ed23 tuned signature (in accordance to ML version);