src/Pure/Isar/token.scala
author wenzelm
Sat, 18 Jan 2014 19:15:12 +0100
changeset 55033 8e8243975860
parent 51048 123be08eed88
child 55035 68afbb5ce4ff
permissions -rw-r--r--
support for nested text cartouches; clarified Symbol.is_symbolic: exclude \<open> and \<close>;
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
36956
21be4832c362 renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents: 34311
diff changeset
     1
/*  Title:      Pure/Isar/token.scala
34139
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
     2
    Author:     Makarius
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
     3
36956
21be4832c362 renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents: 34311
diff changeset
     4
Outer token syntax for Isabelle/Isar.
34139
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
     5
*/
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
     6
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
     7
package isabelle
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
     8
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
     9
36956
21be4832c362 renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents: 34311
diff changeset
    10
object Token
34139
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
    11
{
34157
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    12
  /* tokens */
34139
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
    13
36956
21be4832c362 renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents: 34311
diff changeset
    14
  object Kind extends Enumeration
34139
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
    15
  {
34157
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    16
    val COMMAND = Value("command")
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    17
    val KEYWORD = Value("keyword")
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    18
    val IDENT = Value("identifier")
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    19
    val LONG_IDENT = Value("long identifier")
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    20
    val SYM_IDENT = Value("symbolic identifier")
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    21
    val VAR = Value("schematic variable")
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    22
    val TYPE_IDENT = Value("type variable")
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    23
    val TYPE_VAR = Value("schematic type variable")
40290
47f572aff50a support for floating-point tokens in outer syntax (coinciding with inner syntax version);
wenzelm
parents: 38367
diff changeset
    24
    val NAT = Value("natural number")
47f572aff50a support for floating-point tokens in outer syntax (coinciding with inner syntax version);
wenzelm
parents: 38367
diff changeset
    25
    val FLOAT = Value("floating-point number")
34157
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    26
    val STRING = Value("string")
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    27
    val ALT_STRING = Value("back-quoted string")
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    28
    val VERBATIM = Value("verbatim text")
55033
8e8243975860 support for nested text cartouches;
wenzelm
parents: 51048
diff changeset
    29
    val CARTOUCHE = Value("cartouche")
34157
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    30
    val SPACE = Value("white space")
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    31
    val COMMENT = Value("comment text")
48754
c2c1e5944536 clarified undefined, unparsed, unfinished command spans;
wenzelm
parents: 48718
diff changeset
    32
    val ERROR = Value("bad input")
34157
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    33
    val UNPARSED = Value("unparsed input")
34139
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
    34
  }
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
    35
34157
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    36
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    37
  /* token reader */
34139
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
    38
48335
2f923e994056 more informative errors;
wenzelm
parents: 47012
diff changeset
    39
  class Position(val line: Int, val file: String) extends scala.util.parsing.input.Position
34139
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
    40
  {
34157
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    41
    def column = 0
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    42
    def lineContents = ""
48335
2f923e994056 more informative errors;
wenzelm
parents: 47012
diff changeset
    43
    override def toString =
2f923e994056 more informative errors;
wenzelm
parents: 47012
diff changeset
    44
      if (file == "") ("line " + line.toString)
2f923e994056 more informative errors;
wenzelm
parents: 47012
diff changeset
    45
      else ("line " + line.toString + " of " + quote(file))
34157
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    46
48335
2f923e994056 more informative errors;
wenzelm
parents: 47012
diff changeset
    47
    def advance(token: Token): Position =
34157
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    48
    {
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    49
      var n = 0
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    50
      for (c <- token.content if c == '\n') n += 1
48335
2f923e994056 more informative errors;
wenzelm
parents: 47012
diff changeset
    51
      if (n == 0) this else new Position(line + n, file)
34157
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    52
    }
34139
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
    53
  }
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
    54
34157
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    55
  abstract class Reader extends scala.util.parsing.input.Reader[Token]
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    56
48335
2f923e994056 more informative errors;
wenzelm
parents: 47012
diff changeset
    57
  private class Token_Reader(tokens: List[Token], val pos: Position) extends Reader
34139
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
    58
  {
34157
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    59
    def first = tokens.head
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    60
    def rest = new Token_Reader(tokens.tail, pos.advance(first))
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    61
    def atEnd = tokens.isEmpty
34139
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
    62
  }
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
    63
48335
2f923e994056 more informative errors;
wenzelm
parents: 47012
diff changeset
    64
  def reader(tokens: List[Token], file: String = ""): Reader =
2f923e994056 more informative errors;
wenzelm
parents: 47012
diff changeset
    65
    new Token_Reader(tokens, new Position(1, file))
34139
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
    66
}
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
    67
36956
21be4832c362 renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents: 34311
diff changeset
    68
21be4832c362 renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents: 34311
diff changeset
    69
sealed case class Token(val kind: Token.Kind.Value, val source: String)
21be4832c362 renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents: 34311
diff changeset
    70
{
21be4832c362 renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents: 34311
diff changeset
    71
  def is_command: Boolean = kind == Token.Kind.COMMAND
48718
73e6c22e2d94 more structural parsing for minor modes;
wenzelm
parents: 48605
diff changeset
    72
  def is_keyword: Boolean = kind == Token.Kind.KEYWORD
73e6c22e2d94 more structural parsing for minor modes;
wenzelm
parents: 48605
diff changeset
    73
  def is_operator: Boolean = is_keyword && !Symbol.is_ascii_identifier(source)
36956
21be4832c362 renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents: 34311
diff changeset
    74
  def is_delimited: Boolean =
21be4832c362 renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents: 34311
diff changeset
    75
    kind == Token.Kind.STRING ||
21be4832c362 renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents: 34311
diff changeset
    76
    kind == Token.Kind.ALT_STRING ||
21be4832c362 renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents: 34311
diff changeset
    77
    kind == Token.Kind.VERBATIM ||
55033
8e8243975860 support for nested text cartouches;
wenzelm
parents: 51048
diff changeset
    78
    kind == Token.Kind.CARTOUCHE ||
36956
21be4832c362 renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents: 34311
diff changeset
    79
    kind == Token.Kind.COMMENT
48365
d88aefda01c4 basic support for stand-alone options with external string representation;
wenzelm
parents: 48349
diff changeset
    80
  def is_ident: Boolean = kind == Token.Kind.IDENT
48605
e777363440d6 allow negative int values as well, according to real = int | float;
wenzelm
parents: 48599
diff changeset
    81
  def is_sym_ident: Boolean = kind == Token.Kind.SYM_IDENT
46943
ac1c41ea856d clarified syntax of prospective keywords;
wenzelm
parents: 43611
diff changeset
    82
  def is_string: Boolean = kind == Token.Kind.STRING
48349
a78e5d399599 support Session.Queue with ordering and dependencies;
wenzelm
parents: 48335
diff changeset
    83
  def is_nat: Boolean = kind == Token.Kind.NAT
48365
d88aefda01c4 basic support for stand-alone options with external string representation;
wenzelm
parents: 48349
diff changeset
    84
  def is_float: Boolean = kind == Token.Kind.FLOAT
36956
21be4832c362 renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents: 34311
diff changeset
    85
  def is_name: Boolean =
21be4832c362 renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents: 34311
diff changeset
    86
    kind == Token.Kind.IDENT ||
21be4832c362 renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents: 34311
diff changeset
    87
    kind == Token.Kind.SYM_IDENT ||
21be4832c362 renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents: 34311
diff changeset
    88
    kind == Token.Kind.STRING ||
21be4832c362 renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents: 34311
diff changeset
    89
    kind == Token.Kind.NAT
21be4832c362 renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents: 34311
diff changeset
    90
  def is_xname: Boolean = is_name || kind == Token.Kind.LONG_IDENT
21be4832c362 renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents: 34311
diff changeset
    91
  def is_text: Boolean = is_xname || kind == Token.Kind.VERBATIM
21be4832c362 renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents: 34311
diff changeset
    92
  def is_space: Boolean = kind == Token.Kind.SPACE
21be4832c362 renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents: 34311
diff changeset
    93
  def is_comment: Boolean = kind == Token.Kind.COMMENT
51048
123be08eed88 clarified notion of Command.proper_range (according to Token.is_proper), especially relevant for Active.try_replace_command, to avoid loosing subsequent comments accidentally;
wenzelm
parents: 48754
diff changeset
    94
  def is_improper: Boolean = is_space || is_comment
48599
5e64b7770f35 tuned signature;
wenzelm
parents: 48365
diff changeset
    95
  def is_proper: Boolean = !is_space && !is_comment
48754
c2c1e5944536 clarified undefined, unparsed, unfinished command spans;
wenzelm
parents: 48718
diff changeset
    96
  def is_error: Boolean = kind == Token.Kind.ERROR
47012
0e246130486b clarified command span classification: strict Command.is_command, permissive Command.name;
wenzelm
parents: 46943
diff changeset
    97
  def is_unparsed: Boolean = kind == Token.Kind.UNPARSED
36956
21be4832c362 renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents: 34311
diff changeset
    98
48754
c2c1e5944536 clarified undefined, unparsed, unfinished command spans;
wenzelm
parents: 48718
diff changeset
    99
  def is_unfinished: Boolean = is_error &&
c2c1e5944536 clarified undefined, unparsed, unfinished command spans;
wenzelm
parents: 48718
diff changeset
   100
   (source.startsWith("\"") ||
c2c1e5944536 clarified undefined, unparsed, unfinished command spans;
wenzelm
parents: 48718
diff changeset
   101
    source.startsWith("`") ||
c2c1e5944536 clarified undefined, unparsed, unfinished command spans;
wenzelm
parents: 48718
diff changeset
   102
    source.startsWith("{*") ||
c2c1e5944536 clarified undefined, unparsed, unfinished command spans;
wenzelm
parents: 48718
diff changeset
   103
    source.startsWith("(*"))
c2c1e5944536 clarified undefined, unparsed, unfinished command spans;
wenzelm
parents: 48718
diff changeset
   104
48718
73e6c22e2d94 more structural parsing for minor modes;
wenzelm
parents: 48605
diff changeset
   105
  def is_begin: Boolean = is_keyword && source == "begin"
73e6c22e2d94 more structural parsing for minor modes;
wenzelm
parents: 48605
diff changeset
   106
  def is_end: Boolean = is_keyword && source == "end"
43611
21a57a0c5f25 more general theory header parsing;
wenzelm
parents: 43430
diff changeset
   107
36956
21be4832c362 renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents: 34311
diff changeset
   108
  def content: String =
21be4832c362 renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents: 34311
diff changeset
   109
    if (kind == Token.Kind.STRING) Scan.Lexicon.empty.quoted_content("\"", source)
21be4832c362 renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents: 34311
diff changeset
   110
    else if (kind == Token.Kind.ALT_STRING) Scan.Lexicon.empty.quoted_content("`", source)
21be4832c362 renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents: 34311
diff changeset
   111
    else if (kind == Token.Kind.VERBATIM) Scan.Lexicon.empty.verbatim_content(source)
55033
8e8243975860 support for nested text cartouches;
wenzelm
parents: 51048
diff changeset
   112
    else if (kind == Token.Kind.CARTOUCHE) Scan.Lexicon.empty.cartouche_content(source)
36956
21be4832c362 renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents: 34311
diff changeset
   113
    else if (kind == Token.Kind.COMMENT) Scan.Lexicon.empty.comment_content(source)
21be4832c362 renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents: 34311
diff changeset
   114
    else source
21be4832c362 renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents: 34311
diff changeset
   115
21be4832c362 renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents: 34311
diff changeset
   116
  def text: (String, String) =
48718
73e6c22e2d94 more structural parsing for minor modes;
wenzelm
parents: 48605
diff changeset
   117
    if (is_command && source == ";") ("terminator", "")
36956
21be4832c362 renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents: 34311
diff changeset
   118
    else (kind.toString, source)
21be4832c362 renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents: 34311
diff changeset
   119
}
21be4832c362 renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents: 34311
diff changeset
   120