src/Pure/Isar/token.scala
author wenzelm
Fri, 20 Nov 2020 23:47:34 +0100
changeset 72669 5e7916535860
parent 71601 97ccf48c2f0c
child 72744 0017eb17ac1c
permissions -rw-r--r--
generate theory HTML in Isabelle/Scala; discontinued HTML support in Isabelle/ML;
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
36956
21be4832c362 renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents: 34311
diff changeset
     1
/*  Title:      Pure/Isar/token.scala
34139
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
     2
    Author:     Makarius
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
     3
36956
21be4832c362 renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents: 34311
diff changeset
     4
Outer token syntax for Isabelle/Isar.
34139
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
     5
*/
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
     6
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
     7
package isabelle
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
     8
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
     9
59083
88b0b1f28adc tuned signature;
wenzelm
parents: 59081
diff changeset
    10
import scala.collection.mutable
88b0b1f28adc tuned signature;
wenzelm
parents: 59081
diff changeset
    11
import scala.util.parsing.input
88b0b1f28adc tuned signature;
wenzelm
parents: 59081
diff changeset
    12
88b0b1f28adc tuned signature;
wenzelm
parents: 59081
diff changeset
    13
36956
21be4832c362 renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents: 34311
diff changeset
    14
object Token
34139
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
    15
{
34157
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    16
  /* tokens */
34139
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
    17
36956
21be4832c362 renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents: 34311
diff changeset
    18
  object Kind extends Enumeration
34139
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
    19
  {
59081
2ceb05ee0331 clarified token kind;
wenzelm
parents: 58900
diff changeset
    20
    /*immediate source*/
34157
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    21
    val COMMAND = Value("command")
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    22
    val KEYWORD = Value("keyword")
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    23
    val IDENT = Value("identifier")
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    24
    val LONG_IDENT = Value("long identifier")
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    25
    val SYM_IDENT = Value("symbolic identifier")
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    26
    val VAR = Value("schematic variable")
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    27
    val TYPE_IDENT = Value("type variable")
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    28
    val TYPE_VAR = Value("schematic type variable")
40290
47f572aff50a support for floating-point tokens in outer syntax (coinciding with inner syntax version);
wenzelm
parents: 38367
diff changeset
    29
    val NAT = Value("natural number")
47f572aff50a support for floating-point tokens in outer syntax (coinciding with inner syntax version);
wenzelm
parents: 38367
diff changeset
    30
    val FLOAT = Value("floating-point number")
59081
2ceb05ee0331 clarified token kind;
wenzelm
parents: 58900
diff changeset
    31
    val SPACE = Value("white space")
2ceb05ee0331 clarified token kind;
wenzelm
parents: 58900
diff changeset
    32
    /*delimited content*/
34157
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    33
    val STRING = Value("string")
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    34
    val ALT_STRING = Value("back-quoted string")
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    35
    val VERBATIM = Value("verbatim text")
55512
75c68e05f9ea support ML antiquotations in Scala;
wenzelm
parents: 55510
diff changeset
    36
    val CARTOUCHE = Value("text cartouche")
67439
78759a7bd874 more uniform support for formal comments in outer syntax, notably \<^cancel> and \<^latex>;
wenzelm
parents: 67432
diff changeset
    37
    val INFORMAL_COMMENT = Value("informal comment")
78759a7bd874 more uniform support for formal comments in outer syntax, notably \<^cancel> and \<^latex>;
wenzelm
parents: 67432
diff changeset
    38
    val FORMAL_COMMENT = Value("formal comment")
59081
2ceb05ee0331 clarified token kind;
wenzelm
parents: 58900
diff changeset
    39
    /*special content*/
48754
c2c1e5944536 clarified undefined, unparsed, unfinished command spans;
wenzelm
parents: 48718
diff changeset
    40
    val ERROR = Value("bad input")
34157
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    41
    val UNPARSED = Value("unparsed input")
34139
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
    42
  }
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
    43
34157
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
    44
55494
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    45
  /* parsers */
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    46
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    47
  object Parsers extends Parsers
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    48
67439
78759a7bd874 more uniform support for formal comments in outer syntax, notably \<^cancel> and \<^latex>;
wenzelm
parents: 67432
diff changeset
    49
  trait Parsers extends Scan.Parsers with Comment.Parsers
55494
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    50
  {
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    51
    private def delimited_token: Parser[Token] =
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    52
    {
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    53
      val string = quoted("\"") ^^ (x => Token(Token.Kind.STRING, x))
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    54
      val alt_string = quoted("`") ^^ (x => Token(Token.Kind.ALT_STRING, x))
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    55
      val verb = verbatim ^^ (x => Token(Token.Kind.VERBATIM, x))
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    56
      val cart = cartouche ^^ (x => Token(Token.Kind.CARTOUCHE, x))
67439
78759a7bd874 more uniform support for formal comments in outer syntax, notably \<^cancel> and \<^latex>;
wenzelm
parents: 67432
diff changeset
    57
      val cmt = comment ^^ (x => Token(Token.Kind.INFORMAL_COMMENT, x))
78759a7bd874 more uniform support for formal comments in outer syntax, notably \<^cancel> and \<^latex>;
wenzelm
parents: 67432
diff changeset
    58
      val formal_cmt = comment_cartouche ^^ (x => Token(Token.Kind.FORMAL_COMMENT, x))
55494
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    59
67439
78759a7bd874 more uniform support for formal comments in outer syntax, notably \<^cancel> and \<^latex>;
wenzelm
parents: 67432
diff changeset
    60
      string | (alt_string | (verb | (cart | (cmt | formal_cmt))))
55494
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    61
    }
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    62
58900
1435cc20b022 explicit type Keyword.Keywords;
wenzelm
parents: 58899
diff changeset
    63
    private def other_token(keywords: Keyword.Keywords): Parser[Token] =
55494
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    64
    {
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    65
      val letdigs1 = many1(Symbol.is_letdig)
62103
wenzelm
parents: 60692
diff changeset
    66
      val sub = one(s => s == Symbol.sub_decoded || s == Symbol.sub)
55494
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    67
      val id =
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    68
        one(Symbol.is_letter) ~
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    69
          (rep(letdigs1 | (sub ~ letdigs1 ^^ { case x ~ y => x + y })) ^^ (_.mkString)) ^^
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    70
        { case x ~ y => x + y }
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    71
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    72
      val nat = many1(Symbol.is_digit)
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    73
      val natdot = nat ~ "." ~ nat ^^ { case x ~ y ~ z => x + y + z }
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    74
      val id_nat = id ~ opt("." ~ nat) ^^ { case x ~ Some(y ~ z) => x + y + z case x ~ None => x }
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    75
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    76
      val ident = id ~ rep("." ~> id) ^^
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    77
        { case x ~ Nil => Token(Token.Kind.IDENT, x)
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    78
          case x ~ ys => Token(Token.Kind.LONG_IDENT, (x :: ys).mkString(".")) }
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    79
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    80
      val var_ = "?" ~ id_nat ^^ { case x ~ y => Token(Token.Kind.VAR, x + y) }
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    81
      val type_ident = "'" ~ id ^^ { case x ~ y => Token(Token.Kind.TYPE_IDENT, x + y) }
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    82
      val type_var = "?'" ~ id_nat ^^ { case x ~ y => Token(Token.Kind.TYPE_VAR, x + y) }
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    83
      val nat_ = nat ^^ (x => Token(Token.Kind.NAT, x))
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    84
      val float =
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    85
        ("-" ~ natdot ^^ { case x ~ y => x + y } | natdot) ^^ (x => Token(Token.Kind.FLOAT, x))
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    86
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    87
      val sym_ident =
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    88
        (many1(Symbol.is_symbolic_char) | one(sym => Symbol.is_symbolic(sym))) ^^
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    89
        (x => Token(Token.Kind.SYM_IDENT, x))
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    90
58899
0a793c580685 clarified minor/major lexicon (like ML version);
wenzelm
parents: 58861
diff changeset
    91
      val keyword =
58900
1435cc20b022 explicit type Keyword.Keywords;
wenzelm
parents: 58899
diff changeset
    92
        literal(keywords.minor) ^^ (x => Token(Token.Kind.KEYWORD, x)) |||
1435cc20b022 explicit type Keyword.Keywords;
wenzelm
parents: 58899
diff changeset
    93
        literal(keywords.major) ^^ (x => Token(Token.Kind.COMMAND, x))
55494
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    94
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    95
      val space = many1(Symbol.is_blank) ^^ (x => Token(Token.Kind.SPACE, x))
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    96
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    97
      val recover_delimited =
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    98
        (recover_quoted("\"") |
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
    99
          (recover_quoted("`") |
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
   100
            (recover_verbatim |
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
   101
              (recover_cartouche | recover_comment)))) ^^ (x => Token(Token.Kind.ERROR, x))
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
   102
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
   103
      val bad = one(_ => true) ^^ (x => Token(Token.Kind.ERROR, x))
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
   104
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
   105
      space | (recover_delimited |
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
   106
        (((ident | (var_ | (type_ident | (type_var | (float | (nat_ | sym_ident)))))) |||
58899
0a793c580685 clarified minor/major lexicon (like ML version);
wenzelm
parents: 58861
diff changeset
   107
          keyword) | bad))
55494
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
   108
    }
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
   109
58900
1435cc20b022 explicit type Keyword.Keywords;
wenzelm
parents: 58899
diff changeset
   110
    def token(keywords: Keyword.Keywords): Parser[Token] =
67446
1f4d167b6ac9 discontinued old form of marginal comments;
wenzelm
parents: 67441
diff changeset
   111
      delimited_token | other_token(keywords)
55494
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
   112
58900
1435cc20b022 explicit type Keyword.Keywords;
wenzelm
parents: 58899
diff changeset
   113
    def token_line(keywords: Keyword.Keywords, ctxt: Scan.Line_Context)
55510
1585a65aad64 tuned signature -- emphasize line-oriented aspect;
wenzelm
parents: 55505
diff changeset
   114
      : Parser[(Token, Scan.Line_Context)] =
55494
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
   115
    {
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
   116
      val string =
55510
1585a65aad64 tuned signature -- emphasize line-oriented aspect;
wenzelm
parents: 55505
diff changeset
   117
        quoted_line("\"", ctxt) ^^ { case (x, c) => (Token(Token.Kind.STRING, x), c) }
55494
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
   118
      val alt_string =
55510
1585a65aad64 tuned signature -- emphasize line-oriented aspect;
wenzelm
parents: 55505
diff changeset
   119
        quoted_line("`", ctxt) ^^ { case (x, c) => (Token(Token.Kind.ALT_STRING, x), c) }
1585a65aad64 tuned signature -- emphasize line-oriented aspect;
wenzelm
parents: 55505
diff changeset
   120
      val verb = verbatim_line(ctxt) ^^ { case (x, c) => (Token(Token.Kind.VERBATIM, x), c) }
1585a65aad64 tuned signature -- emphasize line-oriented aspect;
wenzelm
parents: 55505
diff changeset
   121
      val cart = cartouche_line(ctxt) ^^ { case (x, c) => (Token(Token.Kind.CARTOUCHE, x), c) }
67439
78759a7bd874 more uniform support for formal comments in outer syntax, notably \<^cancel> and \<^latex>;
wenzelm
parents: 67432
diff changeset
   122
      val cmt = comment_line(ctxt) ^^ { case (x, c) => (Token(Token.Kind.INFORMAL_COMMENT, x), c) }
78759a7bd874 more uniform support for formal comments in outer syntax, notably \<^cancel> and \<^latex>;
wenzelm
parents: 67432
diff changeset
   123
      val formal_cmt =
78759a7bd874 more uniform support for formal comments in outer syntax, notably \<^cancel> and \<^latex>;
wenzelm
parents: 67432
diff changeset
   124
        comment_cartouche_line(ctxt) ^^ { case (x, c) => (Token(Token.Kind.FORMAL_COMMENT, x), c) }
58900
1435cc20b022 explicit type Keyword.Keywords;
wenzelm
parents: 58899
diff changeset
   125
      val other = other_token(keywords) ^^ { case x => (x, Scan.Finished) }
55494
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
   126
67439
78759a7bd874 more uniform support for formal comments in outer syntax, notably \<^cancel> and \<^latex>;
wenzelm
parents: 67432
diff changeset
   127
      string | (alt_string | (verb | (cart | (cmt | (formal_cmt | other)))))
55494
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
   128
    }
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
   129
  }
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
   130
009b71c1ed23 tuned signature (in accordance to ML version);
wenzelm
parents: 55492
diff changeset
   131
59083
88b0b1f28adc tuned signature;
wenzelm
parents: 59081
diff changeset
   132
  /* explode */
88b0b1f28adc tuned signature;
wenzelm
parents: 59081
diff changeset
   133
88b0b1f28adc tuned signature;
wenzelm
parents: 59081
diff changeset
   134
  def explode(keywords: Keyword.Keywords, inp: CharSequence): List[Token] =
64824
330ec9bc4b75 tuned signature;
wenzelm
parents: 64728
diff changeset
   135
    Parsers.parseAll(Parsers.rep(Parsers.token(keywords)), Scan.char_reader(inp)) match {
59083
88b0b1f28adc tuned signature;
wenzelm
parents: 59081
diff changeset
   136
      case Parsers.Success(tokens, _) => tokens
88b0b1f28adc tuned signature;
wenzelm
parents: 59081
diff changeset
   137
      case _ => error("Unexpected failure of tokenizing input:\n" + inp.toString)
88b0b1f28adc tuned signature;
wenzelm
parents: 59081
diff changeset
   138
    }
88b0b1f28adc tuned signature;
wenzelm
parents: 59081
diff changeset
   139
88b0b1f28adc tuned signature;
wenzelm
parents: 59081
diff changeset
   140
  def explode_line(keywords: Keyword.Keywords, inp: CharSequence, context: Scan.Line_Context)
88b0b1f28adc tuned signature;
wenzelm
parents: 59081
diff changeset
   141
    : (List[Token], Scan.Line_Context) =
88b0b1f28adc tuned signature;
wenzelm
parents: 59081
diff changeset
   142
  {
64824
330ec9bc4b75 tuned signature;
wenzelm
parents: 64728
diff changeset
   143
    var in: input.Reader[Char] = Scan.char_reader(inp)
59083
88b0b1f28adc tuned signature;
wenzelm
parents: 59081
diff changeset
   144
    val toks = new mutable.ListBuffer[Token]
88b0b1f28adc tuned signature;
wenzelm
parents: 59081
diff changeset
   145
    var ctxt = context
88b0b1f28adc tuned signature;
wenzelm
parents: 59081
diff changeset
   146
    while (!in.atEnd) {
88b0b1f28adc tuned signature;
wenzelm
parents: 59081
diff changeset
   147
      Parsers.parse(Parsers.token_line(keywords, ctxt), in) match {
60215
5fb4990dfc73 misc tuning, based on warnings by IntelliJ IDEA;
wenzelm
parents: 60133
diff changeset
   148
        case Parsers.Success((x, c), rest) => toks += x; ctxt = c; in = rest
59083
88b0b1f28adc tuned signature;
wenzelm
parents: 59081
diff changeset
   149
        case Parsers.NoSuccess(_, rest) =>
88b0b1f28adc tuned signature;
wenzelm
parents: 59081
diff changeset
   150
          error("Unexpected failure of tokenizing input:\n" + rest.source.toString)
88b0b1f28adc tuned signature;
wenzelm
parents: 59081
diff changeset
   151
      }
88b0b1f28adc tuned signature;
wenzelm
parents: 59081
diff changeset
   152
    }
88b0b1f28adc tuned signature;
wenzelm
parents: 59081
diff changeset
   153
    (toks.toList, ctxt)
88b0b1f28adc tuned signature;
wenzelm
parents: 59081
diff changeset
   154
  }
88b0b1f28adc tuned signature;
wenzelm
parents: 59081
diff changeset
   155
64671
93e375bd3283 clarified header text;
wenzelm
parents: 64471
diff changeset
   156
  val newline: Token = explode(Keyword.Keywords.empty, "\n").head
93e375bd3283 clarified header text;
wenzelm
parents: 64471
diff changeset
   157
59083
88b0b1f28adc tuned signature;
wenzelm
parents: 59081
diff changeset
   158
69603
67ae2e164c0f support for isabelle update -u path_cartouches;
wenzelm
parents: 69551
diff changeset
   159
  /* embedded */
67ae2e164c0f support for isabelle update -u path_cartouches;
wenzelm
parents: 69551
diff changeset
   160
67ae2e164c0f support for isabelle update -u path_cartouches;
wenzelm
parents: 69551
diff changeset
   161
  def read_embedded(keywords: Keyword.Keywords, inp: CharSequence): Option[Token] =
67ae2e164c0f support for isabelle update -u path_cartouches;
wenzelm
parents: 69551
diff changeset
   162
    explode(keywords, inp) match {
67ae2e164c0f support for isabelle update -u path_cartouches;
wenzelm
parents: 69551
diff changeset
   163
      case List(tok) if tok.is_embedded => Some(tok)
67ae2e164c0f support for isabelle update -u path_cartouches;
wenzelm
parents: 69551
diff changeset
   164
      case _ => None
67ae2e164c0f support for isabelle update -u path_cartouches;
wenzelm
parents: 69551
diff changeset
   165
    }
67ae2e164c0f support for isabelle update -u path_cartouches;
wenzelm
parents: 69551
diff changeset
   166
67ae2e164c0f support for isabelle update -u path_cartouches;
wenzelm
parents: 69551
diff changeset
   167
65523
4f2954adc217 more operations;
wenzelm
parents: 65335
diff changeset
   168
  /* names */
4f2954adc217 more operations;
wenzelm
parents: 65335
diff changeset
   169
4f2954adc217 more operations;
wenzelm
parents: 65335
diff changeset
   170
  def read_name(keywords: Keyword.Keywords, inp: CharSequence): Option[Token] =
4f2954adc217 more operations;
wenzelm
parents: 65335
diff changeset
   171
    explode(keywords, inp) match {
4f2954adc217 more operations;
wenzelm
parents: 65335
diff changeset
   172
      case List(tok) if tok.is_name => Some(tok)
4f2954adc217 more operations;
wenzelm
parents: 65335
diff changeset
   173
      case _ => None
4f2954adc217 more operations;
wenzelm
parents: 65335
diff changeset
   174
    }
4f2954adc217 more operations;
wenzelm
parents: 65335
diff changeset
   175
4f2954adc217 more operations;
wenzelm
parents: 65335
diff changeset
   176
  def quote_name(keywords: Keyword.Keywords, name: String): String =
4f2954adc217 more operations;
wenzelm
parents: 65335
diff changeset
   177
    if (read_name(keywords, name).isDefined) name
4f2954adc217 more operations;
wenzelm
parents: 65335
diff changeset
   178
    else quote(name.replace("\"", "\\\""))
4f2954adc217 more operations;
wenzelm
parents: 65335
diff changeset
   179
4f2954adc217 more operations;
wenzelm
parents: 65335
diff changeset
   180
67132
336831647779 added action to make antiquoted cartouche;
wenzelm
parents: 66915
diff changeset
   181
  /* plain antiquotation (0 or 1 args) */
336831647779 added action to make antiquoted cartouche;
wenzelm
parents: 66915
diff changeset
   182
336831647779 added action to make antiquoted cartouche;
wenzelm
parents: 66915
diff changeset
   183
  def read_antiq_arg(keywords: Keyword.Keywords, inp: CharSequence): Option[(String, Option[String])] =
336831647779 added action to make antiquoted cartouche;
wenzelm
parents: 66915
diff changeset
   184
    explode(keywords, inp).filter(_.is_proper) match {
336831647779 added action to make antiquoted cartouche;
wenzelm
parents: 66915
diff changeset
   185
      case List(t) if t.is_name => Some(t.content, None)
336831647779 added action to make antiquoted cartouche;
wenzelm
parents: 66915
diff changeset
   186
      case List(t1, t2) if t1.is_name && t2.is_embedded => Some(t1.content, Some(t2.content))
336831647779 added action to make antiquoted cartouche;
wenzelm
parents: 66915
diff changeset
   187
      case _ => None
336831647779 added action to make antiquoted cartouche;
wenzelm
parents: 66915
diff changeset
   188
    }
336831647779 added action to make antiquoted cartouche;
wenzelm
parents: 66915
diff changeset
   189
336831647779 added action to make antiquoted cartouche;
wenzelm
parents: 66915
diff changeset
   190
59735
24bee1b11fce misc tuning and simplification;
wenzelm
parents: 59715
diff changeset
   191
  /* implode */
24bee1b11fce misc tuning and simplification;
wenzelm
parents: 59715
diff changeset
   192
24bee1b11fce misc tuning and simplification;
wenzelm
parents: 59715
diff changeset
   193
  def implode(toks: List[Token]): String =
24bee1b11fce misc tuning and simplification;
wenzelm
parents: 59715
diff changeset
   194
    toks match {
24bee1b11fce misc tuning and simplification;
wenzelm
parents: 59715
diff changeset
   195
      case List(tok) => tok.source
60215
5fb4990dfc73 misc tuning, based on warnings by IntelliJ IDEA;
wenzelm
parents: 60133
diff changeset
   196
      case _ => toks.map(_.source).mkString
59735
24bee1b11fce misc tuning and simplification;
wenzelm
parents: 59715
diff changeset
   197
    }
24bee1b11fce misc tuning and simplification;
wenzelm
parents: 59715
diff changeset
   198
24bee1b11fce misc tuning and simplification;
wenzelm
parents: 59715
diff changeset
   199
34157
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
   200
  /* token reader */
34139
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
   201
56464
555f4be59be6 more precise token positions;
wenzelm
parents: 55512
diff changeset
   202
  object Pos
555f4be59be6 more precise token positions;
wenzelm
parents: 55512
diff changeset
   203
  {
59706
bf6ca55aae13 proper command id for inlined errors, which is important for Command.State.accumulate;
wenzelm
parents: 59705
diff changeset
   204
    val none: Pos = new Pos(0, 0, "", "")
bf6ca55aae13 proper command id for inlined errors, which is important for Command.State.accumulate;
wenzelm
parents: 59705
diff changeset
   205
    val start: Pos = new Pos(1, 1, "", "")
bf6ca55aae13 proper command id for inlined errors, which is important for Command.State.accumulate;
wenzelm
parents: 59705
diff changeset
   206
    def file(file: String): Pos = new Pos(1, 1, file, "")
bf6ca55aae13 proper command id for inlined errors, which is important for Command.State.accumulate;
wenzelm
parents: 59705
diff changeset
   207
    def id(id: String): Pos = new Pos(0, 1, "", id)
59715
4f0d0e4ad68d avoid duplicate header errors, more precise positions;
wenzelm
parents: 59706
diff changeset
   208
    val command: Pos = id(Markup.COMMAND)
56464
555f4be59be6 more precise token positions;
wenzelm
parents: 55512
diff changeset
   209
  }
555f4be59be6 more precise token positions;
wenzelm
parents: 55512
diff changeset
   210
59671
9715eb8e9408 more precise position information in Isabelle/Scala, with YXML markup as in Isabelle/ML;
wenzelm
parents: 59122
diff changeset
   211
  final class Pos private[Token](
59696
f505fee04400 tuned signature;
wenzelm
parents: 59695
diff changeset
   212
      val line: Int,
f505fee04400 tuned signature;
wenzelm
parents: 59695
diff changeset
   213
      val offset: Symbol.Offset,
59706
bf6ca55aae13 proper command id for inlined errors, which is important for Command.State.accumulate;
wenzelm
parents: 59705
diff changeset
   214
      val file: String,
bf6ca55aae13 proper command id for inlined errors, which is important for Command.State.accumulate;
wenzelm
parents: 59705
diff changeset
   215
      val id: String)
64824
330ec9bc4b75 tuned signature;
wenzelm
parents: 64728
diff changeset
   216
    extends input.Position
34139
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
   217
  {
34157
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
   218
    def column = 0
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
   219
    def lineContents = ""
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
   220
67895
cd00999d2d30 more position information;
wenzelm
parents: 67446
diff changeset
   221
    def advance(token: Token): Pos = advance(token.source)
cd00999d2d30 more position information;
wenzelm
parents: 67446
diff changeset
   222
    def advance(source: String): Pos =
34157
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
   223
    {
59671
9715eb8e9408 more precise position information in Isabelle/Scala, with YXML markup as in Isabelle/ML;
wenzelm
parents: 59122
diff changeset
   224
      var line1 = line
9715eb8e9408 more precise position information in Isabelle/Scala, with YXML markup as in Isabelle/ML;
wenzelm
parents: 59122
diff changeset
   225
      var offset1 = offset
67895
cd00999d2d30 more position information;
wenzelm
parents: 67446
diff changeset
   226
      for (s <- Symbol.iterator(source)) {
59671
9715eb8e9408 more precise position information in Isabelle/Scala, with YXML markup as in Isabelle/ML;
wenzelm
parents: 59122
diff changeset
   227
        if (line1 > 0 && Symbol.is_newline(s)) line1 += 1
9715eb8e9408 more precise position information in Isabelle/Scala, with YXML markup as in Isabelle/ML;
wenzelm
parents: 59122
diff changeset
   228
        if (offset1 > 0) offset1 += 1
9715eb8e9408 more precise position information in Isabelle/Scala, with YXML markup as in Isabelle/ML;
wenzelm
parents: 59122
diff changeset
   229
      }
9715eb8e9408 more precise position information in Isabelle/Scala, with YXML markup as in Isabelle/ML;
wenzelm
parents: 59122
diff changeset
   230
      if (line1 == line && offset1 == offset) this
59706
bf6ca55aae13 proper command id for inlined errors, which is important for Command.State.accumulate;
wenzelm
parents: 59705
diff changeset
   231
      else new Pos(line1, offset1, file, id)
34157
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
   232
    }
56464
555f4be59be6 more precise token positions;
wenzelm
parents: 55512
diff changeset
   233
59695
a03e0561bdbf clarified positions of theory imports;
wenzelm
parents: 59671
diff changeset
   234
    private def position(end_offset: Symbol.Offset): Position.T =
59671
9715eb8e9408 more precise position information in Isabelle/Scala, with YXML markup as in Isabelle/ML;
wenzelm
parents: 59122
diff changeset
   235
      (if (line > 0) Position.Line(line) else Nil) :::
9715eb8e9408 more precise position information in Isabelle/Scala, with YXML markup as in Isabelle/ML;
wenzelm
parents: 59122
diff changeset
   236
      (if (offset > 0) Position.Offset(offset) else Nil) :::
9715eb8e9408 more precise position information in Isabelle/Scala, with YXML markup as in Isabelle/ML;
wenzelm
parents: 59122
diff changeset
   237
      (if (end_offset > 0) Position.End_Offset(end_offset) else Nil) :::
59706
bf6ca55aae13 proper command id for inlined errors, which is important for Command.State.accumulate;
wenzelm
parents: 59705
diff changeset
   238
      (if (file != "") Position.File(file) else Nil) :::
bf6ca55aae13 proper command id for inlined errors, which is important for Command.State.accumulate;
wenzelm
parents: 59705
diff changeset
   239
      (if (id != "") Position.Id_String(id) else Nil)
59671
9715eb8e9408 more precise position information in Isabelle/Scala, with YXML markup as in Isabelle/ML;
wenzelm
parents: 59122
diff changeset
   240
9715eb8e9408 more precise position information in Isabelle/Scala, with YXML markup as in Isabelle/ML;
wenzelm
parents: 59122
diff changeset
   241
    def position(): Position.T = position(0)
9715eb8e9408 more precise position information in Isabelle/Scala, with YXML markup as in Isabelle/ML;
wenzelm
parents: 59122
diff changeset
   242
    def position(token: Token): Position.T = position(advance(token).offset)
67895
cd00999d2d30 more position information;
wenzelm
parents: 67446
diff changeset
   243
    def position(source: String): Position.T = position(advance(source).offset)
59671
9715eb8e9408 more precise position information in Isabelle/Scala, with YXML markup as in Isabelle/ML;
wenzelm
parents: 59122
diff changeset
   244
64728
601866c61ded more precise markup;
wenzelm
parents: 64671
diff changeset
   245
    override def toString: String = Position.here(position(), delimited = false)
34139
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
   246
  }
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
   247
64824
330ec9bc4b75 tuned signature;
wenzelm
parents: 64728
diff changeset
   248
  abstract class Reader extends input.Reader[Token]
34157
0a0a19153626 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents: 34143
diff changeset
   249
56464
555f4be59be6 more precise token positions;
wenzelm
parents: 55512
diff changeset
   250
  private class Token_Reader(tokens: List[Token], val pos: Pos) extends Reader
34139
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
   251
  {
71601
97ccf48c2f0c misc tuning based on hints by IntelliJ IDEA;
wenzelm
parents: 69891
diff changeset
   252
    def first: Token = tokens.head
97ccf48c2f0c misc tuning based on hints by IntelliJ IDEA;
wenzelm
parents: 69891
diff changeset
   253
    def rest: Token_Reader = new Token_Reader(tokens.tail, pos.advance(first))
97ccf48c2f0c misc tuning based on hints by IntelliJ IDEA;
wenzelm
parents: 69891
diff changeset
   254
    def atEnd: Boolean = tokens.isEmpty
34139
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
   255
  }
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
   256
59705
740a0ca7e09b clarified span position;
wenzelm
parents: 59701
diff changeset
   257
  def reader(tokens: List[Token], start: Token.Pos): Reader =
740a0ca7e09b clarified span position;
wenzelm
parents: 59701
diff changeset
   258
    new Token_Reader(tokens, start)
65335
7634d33c1a79 support to encode/decode command state;
wenzelm
parents: 64824
diff changeset
   259
7634d33c1a79 support to encode/decode command state;
wenzelm
parents: 64824
diff changeset
   260
7634d33c1a79 support to encode/decode command state;
wenzelm
parents: 64824
diff changeset
   261
  /* XML data representation */
7634d33c1a79 support to encode/decode command state;
wenzelm
parents: 64824
diff changeset
   262
7634d33c1a79 support to encode/decode command state;
wenzelm
parents: 64824
diff changeset
   263
  val encode: XML.Encode.T[Token] = (tok: Token) =>
7634d33c1a79 support to encode/decode command state;
wenzelm
parents: 64824
diff changeset
   264
  {
7634d33c1a79 support to encode/decode command state;
wenzelm
parents: 64824
diff changeset
   265
    import XML.Encode._
7634d33c1a79 support to encode/decode command state;
wenzelm
parents: 64824
diff changeset
   266
    pair(int, string)(tok.kind.id, tok.source)
7634d33c1a79 support to encode/decode command state;
wenzelm
parents: 64824
diff changeset
   267
  }
7634d33c1a79 support to encode/decode command state;
wenzelm
parents: 64824
diff changeset
   268
7634d33c1a79 support to encode/decode command state;
wenzelm
parents: 64824
diff changeset
   269
  val decode: XML.Decode.T[Token] = (body: XML.Body) =>
7634d33c1a79 support to encode/decode command state;
wenzelm
parents: 64824
diff changeset
   270
  {
7634d33c1a79 support to encode/decode command state;
wenzelm
parents: 64824
diff changeset
   271
    import XML.Decode._
7634d33c1a79 support to encode/decode command state;
wenzelm
parents: 64824
diff changeset
   272
    val (k, s) = pair(int, string)(body)
7634d33c1a79 support to encode/decode command state;
wenzelm
parents: 64824
diff changeset
   273
    Token(Kind(k), s)
7634d33c1a79 support to encode/decode command state;
wenzelm
parents: 64824
diff changeset
   274
  }
34139
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
   275
}
d1ded303fe0e Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff changeset
   276
36956
21be4832c362 renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents: 34311
diff changeset
   277
60215
5fb4990dfc73 misc tuning, based on warnings by IntelliJ IDEA;
wenzelm
parents: 60133
diff changeset
   278
sealed case class Token(kind: Token.Kind.Value, source: String)
36956
21be4832c362 renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents: 34311
diff changeset
   279
{
21be4832c362 renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents: 34311
diff changeset
   280
  def is_command: Boolean = kind == Token.Kind.COMMAND
63446
19162a9ef7e3 tunes signature;
wenzelm
parents: 63441
diff changeset
   281
  def is_command(name: String): Boolean = kind == Token.Kind.COMMAND && source == name
48718
73e6c22e2d94 more structural parsing for minor modes;
wenzelm
parents: 48605
diff changeset
   282
  def is_keyword: Boolean = kind == Token.Kind.KEYWORD
63446
19162a9ef7e3 tunes signature;
wenzelm
parents: 63441
diff changeset
   283
  def is_keyword(name: String): Boolean = kind == Token.Kind.KEYWORD && source == name
63450
afd657fffdf9 indentation of brackets;
wenzelm
parents: 63446
diff changeset
   284
  def is_keyword(name: Char): Boolean =
afd657fffdf9 indentation of brackets;
wenzelm
parents: 63446
diff changeset
   285
    kind == Token.Kind.KEYWORD && source.length == 1 && source(0) == name
55505
2a1ca7f6607b more uniform ML keyword markup;
wenzelm
parents: 55497
diff changeset
   286
  def is_delimiter: Boolean = is_keyword && !Symbol.is_ascii_identifier(source)
48365
d88aefda01c4 basic support for stand-alone options with external string representation;
wenzelm
parents: 48349
diff changeset
   287
  def is_ident: Boolean = kind == Token.Kind.IDENT
48605
e777363440d6 allow negative int values as well, according to real = int | float;
wenzelm
parents: 48599
diff changeset
   288
  def is_sym_ident: Boolean = kind == Token.Kind.SYM_IDENT
46943
ac1c41ea856d clarified syntax of prospective keywords;
wenzelm
parents: 43611
diff changeset
   289
  def is_string: Boolean = kind == Token.Kind.STRING
48349
a78e5d399599 support Session.Queue with ordering and dependencies;
wenzelm
parents: 48335
diff changeset
   290
  def is_nat: Boolean = kind == Token.Kind.NAT
48365
d88aefda01c4 basic support for stand-alone options with external string representation;
wenzelm
parents: 48349
diff changeset
   291
  def is_float: Boolean = kind == Token.Kind.FLOAT
36956
21be4832c362 renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents: 34311
diff changeset
   292
  def is_name: Boolean =
21be4832c362 renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents: 34311
diff changeset
   293
    kind == Token.Kind.IDENT ||
62969
9f394a16c557 eliminated "xname" and variants;
wenzelm
parents: 62103
diff changeset
   294
    kind == Token.Kind.LONG_IDENT ||
36956
21be4832c362 renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents: 34311
diff changeset
   295
    kind == Token.Kind.SYM_IDENT ||
21be4832c362 renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents: 34311
diff changeset
   296
    kind == Token.Kind.STRING ||
21be4832c362 renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents: 34311
diff changeset
   297
    kind == Token.Kind.NAT
64471
c40c2975fb02 more uniform path syntax, as in ML (see 5a7c919a4ada);
wenzelm
parents: 63477
diff changeset
   298
  def is_embedded: Boolean = is_name ||
c40c2975fb02 more uniform path syntax, as in ML (see 5a7c919a4ada);
wenzelm
parents: 63477
diff changeset
   299
    kind == Token.Kind.CARTOUCHE ||
c40c2975fb02 more uniform path syntax, as in ML (see 5a7c919a4ada);
wenzelm
parents: 63477
diff changeset
   300
    kind == Token.Kind.VAR ||
c40c2975fb02 more uniform path syntax, as in ML (see 5a7c919a4ada);
wenzelm
parents: 63477
diff changeset
   301
    kind == Token.Kind.TYPE_IDENT ||
c40c2975fb02 more uniform path syntax, as in ML (see 5a7c919a4ada);
wenzelm
parents: 63477
diff changeset
   302
    kind == Token.Kind.TYPE_VAR
67432
e6d5547a0a93 clarified Token.is_text (cf. Parse.text in ML);
wenzelm
parents: 67132
diff changeset
   303
  def is_text: Boolean = is_embedded || kind == Token.Kind.VERBATIM
36956
21be4832c362 renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents: 34311
diff changeset
   304
  def is_space: Boolean = kind == Token.Kind.SPACE
67441
cafbb63f10e5 tuned signature;
wenzelm
parents: 67439
diff changeset
   305
  def is_informal_comment: Boolean = kind == Token.Kind.INFORMAL_COMMENT
cafbb63f10e5 tuned signature;
wenzelm
parents: 67439
diff changeset
   306
  def is_formal_comment: Boolean = kind == Token.Kind.FORMAL_COMMENT
69891
def3ec9cdb7e document markers are formal comments, and may thus occur anywhere in the command-span;
wenzelm
parents: 69603
diff changeset
   307
  def is_marker: Boolean =
def3ec9cdb7e document markers are formal comments, and may thus occur anywhere in the command-span;
wenzelm
parents: 69603
diff changeset
   308
    kind == Token.Kind.FORMAL_COMMENT &&
def3ec9cdb7e document markers are formal comments, and may thus occur anywhere in the command-span;
wenzelm
parents: 69603
diff changeset
   309
    (source.startsWith(Symbol.marker) || source.startsWith(Symbol.marker_decoded))
67441
cafbb63f10e5 tuned signature;
wenzelm
parents: 67439
diff changeset
   310
  def is_comment: Boolean = is_informal_comment || is_formal_comment
68729
3a02b424d5fb clarified ignored span / core range: include formal comments, e.g. relevant for error messages from antiquotations;
wenzelm
parents: 67895
diff changeset
   311
  def is_ignored: Boolean = is_space || is_informal_comment
48599
5e64b7770f35 tuned signature;
wenzelm
parents: 48365
diff changeset
   312
  def is_proper: Boolean = !is_space && !is_comment
48754
c2c1e5944536 clarified undefined, unparsed, unfinished command spans;
wenzelm
parents: 48718
diff changeset
   313
  def is_error: Boolean = kind == Token.Kind.ERROR
47012
0e246130486b clarified command span classification: strict Command.is_command, permissive Command.name;
wenzelm
parents: 46943
diff changeset
   314
  def is_unparsed: Boolean = kind == Token.Kind.UNPARSED
36956
21be4832c362 renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents: 34311
diff changeset
   315
48754
c2c1e5944536 clarified undefined, unparsed, unfinished command spans;
wenzelm
parents: 48718
diff changeset
   316
  def is_unfinished: Boolean = is_error &&
c2c1e5944536 clarified undefined, unparsed, unfinished command spans;
wenzelm
parents: 48718
diff changeset
   317
   (source.startsWith("\"") ||
c2c1e5944536 clarified undefined, unparsed, unfinished command spans;
wenzelm
parents: 48718
diff changeset
   318
    source.startsWith("`") ||
c2c1e5944536 clarified undefined, unparsed, unfinished command spans;
wenzelm
parents: 48718
diff changeset
   319
    source.startsWith("{*") ||
57021
6a8fd2ac6756 explicit treatment of unfinished cartouches, which is important for Thy_Syntax.consolidate_spans;
wenzelm
parents: 56998
diff changeset
   320
    source.startsWith("(*") ||
6a8fd2ac6756 explicit treatment of unfinished cartouches, which is important for Thy_Syntax.consolidate_spans;
wenzelm
parents: 56998
diff changeset
   321
    source.startsWith(Symbol.open) ||
6a8fd2ac6756 explicit treatment of unfinished cartouches, which is important for Thy_Syntax.consolidate_spans;
wenzelm
parents: 56998
diff changeset
   322
    source.startsWith(Symbol.open_decoded))
48754
c2c1e5944536 clarified undefined, unparsed, unfinished command spans;
wenzelm
parents: 48718
diff changeset
   323
71601
97ccf48c2f0c misc tuning based on hints by IntelliJ IDEA;
wenzelm
parents: 69891
diff changeset
   324
  def is_open_bracket: Boolean = is_keyword && Word.open_brackets.exists(is_keyword)
97ccf48c2f0c misc tuning based on hints by IntelliJ IDEA;
wenzelm
parents: 69891
diff changeset
   325
  def is_close_bracket: Boolean = is_keyword && Word.close_brackets.exists(is_keyword)
63450
afd657fffdf9 indentation of brackets;
wenzelm
parents: 63446
diff changeset
   326
63446
19162a9ef7e3 tunes signature;
wenzelm
parents: 63441
diff changeset
   327
  def is_begin: Boolean = is_keyword("begin")
19162a9ef7e3 tunes signature;
wenzelm
parents: 63441
diff changeset
   328
  def is_end: Boolean = is_command("end")
63477
f5c81436b930 clarified indentation: 'begin' is treated like a separate command without indent;
wenzelm
parents: 63450
diff changeset
   329
  def is_begin_or_command: Boolean = is_begin || is_command
43611
21a57a0c5f25 more general theory header parsing;
wenzelm
parents: 43430
diff changeset
   330
72669
5e7916535860 generate theory HTML in Isabelle/Scala;
wenzelm
parents: 71601
diff changeset
   331
  def symbol_length: Symbol.Offset = Symbol.iterator(source).length
5e7916535860 generate theory HTML in Isabelle/Scala;
wenzelm
parents: 71601
diff changeset
   332
36956
21be4832c362 renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents: 34311
diff changeset
   333
  def content: String =
55492
28d4db6c6e79 tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
wenzelm
parents: 55137
diff changeset
   334
    if (kind == Token.Kind.STRING) Scan.Parsers.quoted_content("\"", source)
28d4db6c6e79 tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
wenzelm
parents: 55137
diff changeset
   335
    else if (kind == Token.Kind.ALT_STRING) Scan.Parsers.quoted_content("`", source)
28d4db6c6e79 tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
wenzelm
parents: 55137
diff changeset
   336
    else if (kind == Token.Kind.VERBATIM) Scan.Parsers.verbatim_content(source)
28d4db6c6e79 tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
wenzelm
parents: 55137
diff changeset
   337
    else if (kind == Token.Kind.CARTOUCHE) Scan.Parsers.cartouche_content(source)
67439
78759a7bd874 more uniform support for formal comments in outer syntax, notably \<^cancel> and \<^latex>;
wenzelm
parents: 67432
diff changeset
   338
    else if (kind == Token.Kind.INFORMAL_COMMENT) Scan.Parsers.comment_content(source)
78759a7bd874 more uniform support for formal comments in outer syntax, notably \<^cancel> and \<^latex>;
wenzelm
parents: 67432
diff changeset
   339
    else if (kind == Token.Kind.FORMAL_COMMENT) Comment.content(source)
36956
21be4832c362 renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents: 34311
diff changeset
   340
    else source
66914
fb3f13a9c756 uniform system name;
wenzelm
parents: 65523
diff changeset
   341
66915
f4259adc928a disallow blanks, relevant for session_name / theory_name e.g. in build log files;
wenzelm
parents: 66914
diff changeset
   342
  def is_system_name: Boolean =
f4259adc928a disallow blanks, relevant for session_name / theory_name e.g. in build log files;
wenzelm
parents: 66914
diff changeset
   343
  {
f4259adc928a disallow blanks, relevant for session_name / theory_name e.g. in build log files;
wenzelm
parents: 66914
diff changeset
   344
    val s = content
69551
adb52af5ba55 exclude file name components that are special on Windows;
wenzelm
parents: 68730
diff changeset
   345
    is_name && Path.is_wellformed(s) &&
71601
97ccf48c2f0c misc tuning based on hints by IntelliJ IDEA;
wenzelm
parents: 69891
diff changeset
   346
      !s.exists(Symbol.is_ascii_blank) &&
69551
adb52af5ba55 exclude file name components that are special on Windows;
wenzelm
parents: 68730
diff changeset
   347
      !Path.is_reserved(s)
66915
f4259adc928a disallow blanks, relevant for session_name / theory_name e.g. in build log files;
wenzelm
parents: 66914
diff changeset
   348
  }
36956
21be4832c362 renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents: 34311
diff changeset
   349
}