src/Pure/Isar/outer_lex.scala
author wenzelm
Tue, 22 Dec 2009 14:58:13 +0100
changeset 34157 0a0a19153626
parent 34143 ded454429df3
child 34265 dc932fc1b906
permissions -rw-r--r--
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure; added Token_Reader; tuned;
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
34139
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
     1
/*  Title:      Pure/Isar/outer_lex.scala
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
     2
    Author:     Makarius
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
     3
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
     4
Outer lexical syntax for Isabelle/Isar.
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
     5
*/
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
     6
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
     7
package isabelle
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
     8
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
     9
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
    10
object Outer_Lex
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
    11
{
34157
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    12
  /* tokens */
34139
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
    13
34157
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    14
  object Token_Kind extends Enumeration
34139
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
    15
  {
34157
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    16
    val COMMAND = Value("command")
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    17
    val KEYWORD = Value("keyword")
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    18
    val IDENT = Value("identifier")
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    19
    val LONG_IDENT = Value("long identifier")
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    20
    val SYM_IDENT = Value("symbolic identifier")
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    21
    val VAR = Value("schematic variable")
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    22
    val TYPE_IDENT = Value("type variable")
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    23
    val TYPE_VAR = Value("schematic type variable")
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    24
    val NAT = Value("number")
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    25
    val STRING = Value("string")
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    26
    val ALT_STRING = Value("back-quoted string")
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    27
    val VERBATIM = Value("verbatim text")
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    28
    val SPACE = Value("white space")
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    29
    val COMMENT = Value("comment text")
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    30
    val BAD_INPUT = Value("bad input")
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    31
    val UNPARSED = Value("unparsed input")
34139
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
    32
  }
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
    33
34157
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    34
  sealed case class Token(val kind: Token_Kind.Value, val source: String)
34139
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
    35
  {
34157
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    36
    def is_delimited: Boolean =
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    37
      kind == Token_Kind.STRING ||
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    38
      kind == Token_Kind.ALT_STRING ||
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    39
      kind == Token_Kind.VERBATIM ||
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    40
      kind == Token_Kind.COMMENT
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    41
    def is_name: Boolean =
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    42
      kind == Token_Kind.IDENT ||
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    43
      kind == Token_Kind.SYM_IDENT ||
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    44
      kind == Token_Kind.STRING ||
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    45
      kind == Token_Kind.NAT
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    46
    def is_xname: Boolean = is_name || kind == Token_Kind.LONG_IDENT
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    47
    def is_text: Boolean = is_xname || kind == Token_Kind.VERBATIM
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    48
    def is_space: Boolean = kind == Token_Kind.SPACE
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    49
    def is_comment: Boolean = kind == Token_Kind.COMMENT
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    50
    def is_proper: Boolean = !(is_space || is_comment)
34139
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
    51
34157
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    52
    def content: String =
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    53
      if (kind == Token_Kind.STRING) Scan.Lexicon.empty.quoted_content("\"", source)
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    54
      else if (kind == Token_Kind.ALT_STRING) Scan.Lexicon.empty.quoted_content("`", source)
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    55
      else if (kind == Token_Kind.VERBATIM) Scan.Lexicon.empty.verbatim_content(source)
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    56
      else if (kind == Token_Kind.COMMENT) Scan.Lexicon.empty.comment_content(source)
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    57
      else source
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    58
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    59
    def text: (String, String) =
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    60
      if (kind == Token_Kind.COMMAND && source == ";") ("terminator", "")
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    61
      else (kind.toString, source)
34139
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
    62
  }
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
    63
34157
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    64
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    65
  /* token reader */
34139
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
    66
34157
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    67
  class Line_Position(val line: Int) extends scala.util.parsing.input.Position
34139
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
    68
  {
34157
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    69
    def column = 0
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    70
    def lineContents = ""
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    71
    override def toString = line.toString
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    72
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    73
    def advance(token: Token): Line_Position =
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    74
    {
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    75
      var n = 0
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    76
      for (c <- token.content if c == '\n') n += 1
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    77
      if (n == 0) this else new Line_Position(line + n)
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    78
    }
34139
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
    79
  }
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
    80
34157
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    81
  abstract class Reader extends scala.util.parsing.input.Reader[Token]
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    82
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    83
  private class Token_Reader(tokens: List[Token], val pos: Line_Position) extends Reader
34139
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
    84
  {
34157
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    85
    def first = tokens.head
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    86
    def rest = new Token_Reader(tokens.tail, pos.advance(first))
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    87
    def atEnd = tokens.isEmpty
34139
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
    88
  }
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
    89
34157
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    90
  def reader(tokens: List[Token]): Reader = new Token_Reader(tokens, new Line_Position(1))
34139
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
    91
}
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
    92