src/Pure/Isar/outer_syntax.scala
author wenzelm
Sun Oct 19 11:20:03 2014 +0200 (2014-10-19)
changeset 58706 70a947611792
parent 58703 883efcc7a50d
child 58743 c07a59140fee
permissions -rw-r--r--
tuned signature and modules;
wenzelm@34166
     1
/*  Title:      Pure/Isar/outer_syntax.scala
wenzelm@34166
     2
    Author:     Makarius
wenzelm@34166
     3
wenzelm@34166
     4
Isabelle/Isar outer syntax.
wenzelm@34166
     5
*/
wenzelm@34166
     6
wenzelm@34166
     7
package isabelle
wenzelm@34166
     8
wenzelm@34166
     9
wenzelm@34166
    10
import scala.util.parsing.input.{Reader, CharSequenceReader}
wenzelm@43411
    11
import scala.collection.mutable
wenzelm@58706
    12
import scala.annotation.tailrec
wenzelm@34166
    13
wenzelm@34166
    14
wenzelm@43774
    15
object Outer_Syntax
wenzelm@43774
    16
{
wenzelm@58706
    17
  /* syntax */
wenzelm@58706
    18
wenzelm@58706
    19
  val empty: Outer_Syntax = new Outer_Syntax()
wenzelm@58706
    20
wenzelm@58706
    21
  def init(): Outer_Syntax = new Outer_Syntax(completion = Completion.init())
wenzelm@58706
    22
wenzelm@58706
    23
wenzelm@58706
    24
  /* string literals */
wenzelm@58706
    25
wenzelm@43774
    26
  def quote_string(str: String): String =
wenzelm@43774
    27
  {
wenzelm@43774
    28
    val result = new StringBuilder(str.length + 10)
wenzelm@43774
    29
    result += '"'
wenzelm@43774
    30
    for (s <- Symbol.iterator(str)) {
wenzelm@43774
    31
      if (s.length == 1) {
wenzelm@43774
    32
        val c = s(0)
wenzelm@43774
    33
        if (c < 32 && c != YXML.X && c != YXML.Y || c == '\\' || c == '"') {
wenzelm@43774
    34
          result += '\\'
wenzelm@43774
    35
          if (c < 10) result += '0'
wenzelm@43774
    36
          if (c < 100) result += '0'
wenzelm@43774
    37
          result ++= (c.asInstanceOf[Int].toString)
wenzelm@43774
    38
        }
wenzelm@43774
    39
        else result += c
wenzelm@43774
    40
      }
wenzelm@43774
    41
      else result ++= s
wenzelm@43774
    42
    }
wenzelm@43774
    43
    result += '"'
wenzelm@43774
    44
    result.toString
wenzelm@43774
    45
  }
wenzelm@46626
    46
wenzelm@58696
    47
wenzelm@58697
    48
  /* line-oriented structure */
wenzelm@58696
    49
wenzelm@58697
    50
  object Line_Structure
wenzelm@58696
    51
  {
wenzelm@58700
    52
    val init = Line_Structure()
wenzelm@58696
    53
  }
wenzelm@58696
    54
wenzelm@58700
    55
  sealed case class Line_Structure(
wenzelm@58700
    56
    improper: Boolean = true,
wenzelm@58700
    57
    command: Boolean = false,
wenzelm@58700
    58
    depth: Int = 0,
wenzelm@58700
    59
    span_depth: Int = 0,
wenzelm@58700
    60
    after_span_depth: Int = 0)
wenzelm@58706
    61
wenzelm@58706
    62
wenzelm@58706
    63
  /* overall document structure */
wenzelm@58706
    64
wenzelm@58706
    65
  sealed abstract class Document { def length: Int }
wenzelm@58706
    66
  case class Document_Block(val name: String, val body: List[Document]) extends Document
wenzelm@58706
    67
  {
wenzelm@58706
    68
    val length: Int = (0 /: body)(_ + _.length)
wenzelm@58706
    69
  }
wenzelm@58706
    70
  case class Document_Atom(val command: Command) extends Document
wenzelm@58706
    71
  {
wenzelm@58706
    72
    def length: Int = command.length
wenzelm@58706
    73
  }
wenzelm@43774
    74
}
wenzelm@43774
    75
wenzelm@46712
    76
final class Outer_Syntax private(
wenzelm@48864
    77
  keywords: Map[String, (String, List[String])] = Map.empty,
wenzelm@46626
    78
  lexicon: Scan.Lexicon = Scan.Lexicon.empty,
wenzelm@53280
    79
  val completion: Completion = Completion.empty,
wenzelm@55749
    80
  val language_context: Completion.Language_Context = Completion.Language_Context.outer,
wenzelm@56393
    81
  val has_tokens: Boolean = true) extends Prover.Syntax
wenzelm@34166
    82
{
wenzelm@58706
    83
  /** syntax content **/
wenzelm@58706
    84
wenzelm@48660
    85
  override def toString: String =
wenzelm@48864
    86
    (for ((name, (kind, files)) <- keywords) yield {
wenzelm@48660
    87
      if (kind == Keyword.MINOR) quote(name)
wenzelm@48864
    88
      else
wenzelm@48864
    89
        quote(name) + " :: " + quote(kind) +
wenzelm@48864
    90
        (if (files.isEmpty) "" else " (" + commas_quote(files) + ")")
wenzelm@48671
    91
    }).toList.sorted.mkString("keywords\n  ", " and\n  ", "")
wenzelm@48660
    92
wenzelm@58695
    93
wenzelm@58695
    94
  /* keyword kind */
wenzelm@58695
    95
wenzelm@48864
    96
  def keyword_kind_files(name: String): Option[(String, List[String])] = keywords.get(name)
wenzelm@48864
    97
  def keyword_kind(name: String): Option[String] = keyword_kind_files(name).map(_._1)
wenzelm@38471
    98
wenzelm@58695
    99
  def is_command(name: String): Boolean =
wenzelm@58695
   100
    keyword_kind(name) match {
wenzelm@58695
   101
      case Some(kind) => kind != Keyword.MINOR
wenzelm@58695
   102
      case None => false
wenzelm@58695
   103
    }
wenzelm@58695
   104
wenzelm@58696
   105
  def command_kind(token: Token, pred: String => Boolean): Boolean =
wenzelm@58696
   106
    token.is_command && is_command(token.source) &&
wenzelm@58696
   107
      pred(keyword_kind(token.source).get)
wenzelm@58696
   108
wenzelm@58695
   109
wenzelm@58695
   110
  /* load commands */
wenzelm@58695
   111
wenzelm@57901
   112
  def load_command(name: String): Option[List[String]] =
wenzelm@57901
   113
    keywords.get(name) match {
wenzelm@54513
   114
      case Some((Keyword.THY_LOAD, exts)) => Some(exts)
wenzelm@54462
   115
      case _ => None
wenzelm@54462
   116
    }
wenzelm@54462
   117
wenzelm@56314
   118
  val load_commands: List[(String, List[String])] =
wenzelm@48885
   119
    (for ((name, (Keyword.THY_LOAD, files)) <- keywords.iterator) yield (name, files)).toList
wenzelm@48872
   120
wenzelm@56393
   121
  def load_commands_in(text: String): Boolean =
wenzelm@56393
   122
    load_commands.exists({ case (cmd, _) => text.containsSlice(cmd) })
wenzelm@56393
   123
wenzelm@58695
   124
wenzelm@58695
   125
  /* add keywords */
wenzelm@58695
   126
wenzelm@50128
   127
  def + (name: String, kind: (String, List[String]), replace: Option[String]): Outer_Syntax =
wenzelm@53280
   128
  {
wenzelm@53280
   129
    val keywords1 = keywords + (name -> kind)
wenzelm@53280
   130
    val lexicon1 = lexicon + name
wenzelm@53280
   131
    val completion1 =
wenzelm@50128
   132
      if (Keyword.control(kind._1) || replace == Some("")) completion
wenzelm@53280
   133
      else completion + (name, replace getOrElse name)
wenzelm@55749
   134
    new Outer_Syntax(keywords1, lexicon1, completion1, language_context, true)
wenzelm@53280
   135
  }
wenzelm@34166
   136
wenzelm@53280
   137
  def + (name: String, kind: (String, List[String])): Outer_Syntax =
wenzelm@53280
   138
    this + (name, kind, Some(name))
wenzelm@53280
   139
  def + (name: String, kind: String): Outer_Syntax =
wenzelm@53280
   140
    this + (name, (kind, Nil), Some(name))
wenzelm@50128
   141
  def + (name: String, replace: Option[String]): Outer_Syntax =
wenzelm@50128
   142
    this + (name, (Keyword.MINOR, Nil), replace)
wenzelm@50128
   143
  def + (name: String): Outer_Syntax = this + (name, None)
wenzelm@48706
   144
wenzelm@48873
   145
  def add_keywords(keywords: Thy_Header.Keywords): Outer_Syntax =
wenzelm@48873
   146
    (this /: keywords) {
wenzelm@52439
   147
      case (syntax, (name, Some((kind, _)), replace)) =>
wenzelm@50128
   148
        syntax +
wenzelm@50128
   149
          (Symbol.decode(name), kind, replace) +
wenzelm@50128
   150
          (Symbol.encode(name), kind, replace)
wenzelm@52439
   151
      case (syntax, (name, None, replace)) =>
wenzelm@50128
   152
        syntax +
wenzelm@50128
   153
          (Symbol.decode(name), replace) +
wenzelm@50128
   154
          (Symbol.encode(name), replace)
wenzelm@46940
   155
    }
wenzelm@34166
   156
wenzelm@58695
   157
wenzelm@58706
   158
  /* language context */
wenzelm@34166
   159
wenzelm@58706
   160
  def set_language_context(context: Completion.Language_Context): Outer_Syntax =
wenzelm@58706
   161
    new Outer_Syntax(keywords, lexicon, completion, context, has_tokens)
wenzelm@58706
   162
wenzelm@58706
   163
  def no_tokens: Outer_Syntax =
wenzelm@46969
   164
  {
wenzelm@58706
   165
    require(keywords.isEmpty && lexicon.isEmpty)
wenzelm@58706
   166
    new Outer_Syntax(
wenzelm@58706
   167
      completion = completion,
wenzelm@58706
   168
      language_context = language_context,
wenzelm@58706
   169
      has_tokens = false)
wenzelm@46969
   170
  }
wenzelm@40454
   171
wenzelm@58706
   172
wenzelm@40454
   173
wenzelm@58706
   174
  /** parsing **/
wenzelm@34166
   175
wenzelm@58697
   176
  /* line-oriented structure */
wenzelm@58696
   177
wenzelm@58700
   178
  def line_structure(tokens: List[Token], struct: Outer_Syntax.Line_Structure)
wenzelm@58700
   179
    : Outer_Syntax.Line_Structure =
wenzelm@58696
   180
  {
wenzelm@58700
   181
    val improper1 = tokens.forall(_.is_improper)
wenzelm@58700
   182
    val command1 = tokens.exists(_.is_command)
wenzelm@58700
   183
wenzelm@58696
   184
    val depth1 =
wenzelm@58696
   185
      if (tokens.exists(tok => command_kind(tok, Keyword.theory))) 0
wenzelm@58700
   186
      else if (command1) struct.after_span_depth
wenzelm@58700
   187
      else struct.span_depth
wenzelm@58700
   188
wenzelm@58700
   189
    val (span_depth1, after_span_depth1) =
wenzelm@58700
   190
      ((struct.span_depth, struct.after_span_depth) /: tokens) {
wenzelm@58703
   191
        case ((x, y), tok) =>
wenzelm@58703
   192
          if (tok.is_command) {
wenzelm@58703
   193
            if (command_kind(tok, Keyword.theory_goal)) (2, 1)
wenzelm@58703
   194
            else if (command_kind(tok, Keyword.theory)) (1, 0)
wenzelm@58703
   195
            else if (command_kind(tok, Keyword.proof_goal) || tok.source == "{") (y + 2, y + 1)
wenzelm@58703
   196
            else if (command_kind(tok, Keyword.qed) || tok.source == "}") (y + 1, y - 1)
wenzelm@58703
   197
            else if (command_kind(tok, Keyword.qed_global)) (1, 0)
wenzelm@58703
   198
            else (x, y)
wenzelm@58703
   199
          }
wenzelm@58703
   200
          else (x, y)
wenzelm@58696
   201
      }
wenzelm@58700
   202
wenzelm@58700
   203
    Outer_Syntax.Line_Structure(improper1, command1, depth1, span_depth1, after_span_depth1)
wenzelm@58696
   204
  }
wenzelm@58696
   205
wenzelm@58696
   206
wenzelm@53280
   207
  /* token language */
wenzelm@53280
   208
wenzelm@57907
   209
  def scan(input: CharSequence): List[Token] =
wenzelm@52066
   210
  {
wenzelm@58503
   211
    val in: Reader[Char] = new CharSequenceReader(input)
wenzelm@55616
   212
    Token.Parsers.parseAll(
wenzelm@57907
   213
        Token.Parsers.rep(Token.Parsers.token(lexicon, is_command)), in) match {
wenzelm@55494
   214
      case Token.Parsers.Success(tokens, _) => tokens
wenzelm@57907
   215
      case _ => error("Unexpected failure of tokenizing input:\n" + input.toString)
wenzelm@34166
   216
    }
wenzelm@52066
   217
  }
wenzelm@34166
   218
wenzelm@58697
   219
  def scan_line(
wenzelm@58697
   220
    input: CharSequence,
wenzelm@58697
   221
    context: Scan.Line_Context,
wenzelm@58697
   222
    structure: Outer_Syntax.Line_Structure)
wenzelm@58697
   223
    : (List[Token], Scan.Line_Context, Outer_Syntax.Line_Structure) =
wenzelm@52066
   224
  {
wenzelm@52066
   225
    var in: Reader[Char] = new CharSequenceReader(input)
wenzelm@52066
   226
    val toks = new mutable.ListBuffer[Token]
wenzelm@52066
   227
    var ctxt = context
wenzelm@52066
   228
    while (!in.atEnd) {
wenzelm@55510
   229
      Token.Parsers.parse(Token.Parsers.token_line(lexicon, is_command, ctxt), in) match {
wenzelm@55494
   230
        case Token.Parsers.Success((x, c), rest) => { toks += x; ctxt = c; in = rest }
wenzelm@55494
   231
        case Token.Parsers.NoSuccess(_, rest) =>
wenzelm@52066
   232
          error("Unexpected failure of tokenizing input:\n" + rest.source.toString)
wenzelm@43411
   233
      }
wenzelm@43411
   234
    }
wenzelm@58696
   235
    val tokens = toks.toList
wenzelm@58700
   236
    (tokens, ctxt, line_structure(tokens, structure))
wenzelm@52066
   237
  }
wenzelm@55616
   238
wenzelm@55616
   239
wenzelm@58706
   240
  /* command spans */
wenzelm@57905
   241
wenzelm@57905
   242
  def parse_spans(toks: List[Token]): List[Command_Span.Span] =
wenzelm@57905
   243
  {
wenzelm@57905
   244
    val result = new mutable.ListBuffer[Command_Span.Span]
wenzelm@57905
   245
    val content = new mutable.ListBuffer[Token]
wenzelm@57905
   246
    val improper = new mutable.ListBuffer[Token]
wenzelm@57905
   247
wenzelm@57905
   248
    def ship(span: List[Token])
wenzelm@57905
   249
    {
wenzelm@57905
   250
      val kind =
wenzelm@57910
   251
        if (!span.isEmpty && span.head.is_command && !span.exists(_.is_error)) {
wenzelm@57910
   252
          val name = span.head.source
wenzelm@57911
   253
          val pos = Position.Range(Text.Range(0, Symbol.iterator(name).length) + 1)
wenzelm@57910
   254
          Command_Span.Command_Span(name, pos)
wenzelm@57910
   255
        }
wenzelm@57905
   256
        else if (span.forall(_.is_improper)) Command_Span.Ignored_Span
wenzelm@57905
   257
        else Command_Span.Malformed_Span
wenzelm@57905
   258
      result += Command_Span.Span(kind, span)
wenzelm@57905
   259
    }
wenzelm@57905
   260
wenzelm@57905
   261
    def flush()
wenzelm@57905
   262
    {
wenzelm@57905
   263
      if (!content.isEmpty) { ship(content.toList); content.clear }
wenzelm@57905
   264
      if (!improper.isEmpty) { ship(improper.toList); improper.clear }
wenzelm@57905
   265
    }
wenzelm@57905
   266
wenzelm@57905
   267
    for (tok <- toks) {
wenzelm@57905
   268
      if (tok.is_command) { flush(); content += tok }
wenzelm@57905
   269
      else if (tok.is_improper) improper += tok
wenzelm@57905
   270
      else { content ++= improper; improper.clear; content += tok }
wenzelm@57905
   271
    }
wenzelm@57905
   272
    flush()
wenzelm@57905
   273
wenzelm@57905
   274
    result.toList
wenzelm@57905
   275
  }
wenzelm@57905
   276
wenzelm@57906
   277
  def parse_spans(input: CharSequence): List[Command_Span.Span] =
wenzelm@57906
   278
    parse_spans(scan(input))
wenzelm@57906
   279
wenzelm@57905
   280
wenzelm@58706
   281
  /* overall document structure */
wenzelm@55616
   282
wenzelm@58706
   283
  def heading_level(command: Command): Option[Int] =
wenzelm@58706
   284
  {
wenzelm@58706
   285
    keyword_kind(command.name) match {
wenzelm@58706
   286
      case _ if command.name == "header" => Some(0)
wenzelm@58706
   287
      case Some(Keyword.THY_HEADING1) => Some(1)
wenzelm@58706
   288
      case Some(Keyword.THY_HEADING2) | Some(Keyword.PRF_HEADING2) => Some(2)
wenzelm@58706
   289
      case Some(Keyword.THY_HEADING3) | Some(Keyword.PRF_HEADING3) => Some(3)
wenzelm@58706
   290
      case Some(Keyword.THY_HEADING4) | Some(Keyword.PRF_HEADING4) => Some(4)
wenzelm@58706
   291
      case Some(kind) if Keyword.theory(kind) => Some(5)
wenzelm@58706
   292
      case _ => None
wenzelm@58706
   293
    }
wenzelm@58706
   294
  }
wenzelm@58706
   295
wenzelm@58706
   296
  def parse_document(node_name: Document.Node.Name, text: CharSequence): Outer_Syntax.Document =
wenzelm@58706
   297
  {
wenzelm@58706
   298
    /* stack operations */
wenzelm@58706
   299
wenzelm@58706
   300
    def buffer(): mutable.ListBuffer[Outer_Syntax.Document] =
wenzelm@58706
   301
      new mutable.ListBuffer[Outer_Syntax.Document]
wenzelm@58706
   302
wenzelm@58706
   303
    var stack: List[(Int, String, mutable.ListBuffer[Outer_Syntax.Document])] =
wenzelm@58706
   304
      List((0, node_name.toString, buffer()))
wenzelm@55616
   305
wenzelm@58706
   306
    @tailrec def close(level: Int => Boolean)
wenzelm@58706
   307
    {
wenzelm@58706
   308
      stack match {
wenzelm@58706
   309
        case (lev, name, body) :: (_, _, body2) :: rest if level(lev) =>
wenzelm@58706
   310
          body2 += Outer_Syntax.Document_Block(name, body.toList)
wenzelm@58706
   311
          stack = stack.tail
wenzelm@58706
   312
          close(level)
wenzelm@58706
   313
        case _ =>
wenzelm@58706
   314
      }
wenzelm@58706
   315
    }
wenzelm@58706
   316
wenzelm@58706
   317
    def result(): Outer_Syntax.Document =
wenzelm@58706
   318
    {
wenzelm@58706
   319
      close(_ => true)
wenzelm@58706
   320
      val (_, name, body) = stack.head
wenzelm@58706
   321
      Outer_Syntax.Document_Block(name, body.toList)
wenzelm@58706
   322
    }
wenzelm@58706
   323
wenzelm@58706
   324
    def add(command: Command)
wenzelm@58706
   325
    {
wenzelm@58706
   326
      heading_level(command) match {
wenzelm@58706
   327
        case Some(i) =>
wenzelm@58706
   328
          close(_ > i)
wenzelm@58706
   329
          stack = (i + 1, command.source, buffer()) :: stack
wenzelm@58706
   330
        case None =>
wenzelm@58706
   331
      }
wenzelm@58706
   332
      stack.head._3 += Outer_Syntax.Document_Atom(command)
wenzelm@58706
   333
    }
wenzelm@58706
   334
wenzelm@58706
   335
wenzelm@58706
   336
    /* result structure */
wenzelm@58706
   337
wenzelm@58706
   338
    val spans = parse_spans(text)
wenzelm@58706
   339
    spans.foreach(span => add(Command(Document_ID.none, node_name, Nil, span)))
wenzelm@58706
   340
    result()
wenzelm@55616
   341
  }
wenzelm@34166
   342
}