src/Pure/Isar/outer_syntax.scala
author wenzelm
Thu, 15 Mar 2012 10:16:21 +0100
changeset 46940 a40be2f10ca9
parent 46712 8650d9a95736
child 46941 c0f776b661fa
permissions -rw-r--r--
explicit Outer_Syntax.Decl;
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
34166
446a33b874b3 renamed class Outer_Keyword to Outer_Syntax;
wenzelm
parents:
diff changeset
     1
/*  Title:      Pure/Isar/outer_syntax.scala
446a33b874b3 renamed class Outer_Keyword to Outer_Syntax;
wenzelm
parents:
diff changeset
     2
    Author:     Makarius
446a33b874b3 renamed class Outer_Keyword to Outer_Syntax;
wenzelm
parents:
diff changeset
     3
446a33b874b3 renamed class Outer_Keyword to Outer_Syntax;
wenzelm
parents:
diff changeset
     4
Isabelle/Isar outer syntax.
446a33b874b3 renamed class Outer_Keyword to Outer_Syntax;
wenzelm
parents:
diff changeset
     5
*/
446a33b874b3 renamed class Outer_Keyword to Outer_Syntax;
wenzelm
parents:
diff changeset
     6
446a33b874b3 renamed class Outer_Keyword to Outer_Syntax;
wenzelm
parents:
diff changeset
     7
package isabelle
446a33b874b3 renamed class Outer_Keyword to Outer_Syntax;
wenzelm
parents:
diff changeset
     8
446a33b874b3 renamed class Outer_Keyword to Outer_Syntax;
wenzelm
parents:
diff changeset
     9
446a33b874b3 renamed class Outer_Keyword to Outer_Syntax;
wenzelm
parents:
diff changeset
    10
import scala.util.parsing.input.{Reader, CharSequenceReader}
43411
0206466ee473 some support for partial scans with explicit context;
wenzelm
parents: 40533
diff changeset
    11
import scala.collection.mutable
34166
446a33b874b3 renamed class Outer_Keyword to Outer_Syntax;
wenzelm
parents:
diff changeset
    12
446a33b874b3 renamed class Outer_Keyword to Outer_Syntax;
wenzelm
parents:
diff changeset
    13
43774
6dfdb70496fe added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents: 43695
diff changeset
    14
object Outer_Syntax
6dfdb70496fe added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents: 43695
diff changeset
    15
{
6dfdb70496fe added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents: 43695
diff changeset
    16
  def quote_string(str: String): String =
6dfdb70496fe added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents: 43695
diff changeset
    17
  {
6dfdb70496fe added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents: 43695
diff changeset
    18
    val result = new StringBuilder(str.length + 10)
6dfdb70496fe added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents: 43695
diff changeset
    19
    result += '"'
6dfdb70496fe added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents: 43695
diff changeset
    20
    for (s <- Symbol.iterator(str)) {
6dfdb70496fe added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents: 43695
diff changeset
    21
      if (s.length == 1) {
6dfdb70496fe added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents: 43695
diff changeset
    22
        val c = s(0)
6dfdb70496fe added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents: 43695
diff changeset
    23
        if (c < 32 && c != YXML.X && c != YXML.Y || c == '\\' || c == '"') {
6dfdb70496fe added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents: 43695
diff changeset
    24
          result += '\\'
6dfdb70496fe added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents: 43695
diff changeset
    25
          if (c < 10) result += '0'
6dfdb70496fe added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents: 43695
diff changeset
    26
          if (c < 100) result += '0'
6dfdb70496fe added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents: 43695
diff changeset
    27
          result ++= (c.asInstanceOf[Int].toString)
6dfdb70496fe added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents: 43695
diff changeset
    28
        }
6dfdb70496fe added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents: 43695
diff changeset
    29
        else result += c
6dfdb70496fe added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents: 43695
diff changeset
    30
      }
6dfdb70496fe added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents: 43695
diff changeset
    31
      else result ++= s
6dfdb70496fe added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents: 43695
diff changeset
    32
    }
6dfdb70496fe added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents: 43695
diff changeset
    33
    result += '"'
6dfdb70496fe added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents: 43695
diff changeset
    34
    result.toString
6dfdb70496fe added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents: 43695
diff changeset
    35
  }
46626
a02115865bcc streamlined abstract datatype;
wenzelm
parents: 46624
diff changeset
    36
46940
a40be2f10ca9 explicit Outer_Syntax.Decl;
wenzelm
parents: 46712
diff changeset
    37
  type Decl = (String, Option[(String, List[String])])
46626
a02115865bcc streamlined abstract datatype;
wenzelm
parents: 46624
diff changeset
    38
  def init(): Outer_Syntax = new Outer_Syntax()
43774
6dfdb70496fe added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents: 43695
diff changeset
    39
}
6dfdb70496fe added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents: 43695
diff changeset
    40
46712
8650d9a95736 prefer final ADTs -- prevent ooddities;
wenzelm
parents: 46626
diff changeset
    41
final class Outer_Syntax private(
46626
a02115865bcc streamlined abstract datatype;
wenzelm
parents: 46624
diff changeset
    42
  keywords: Map[String, String] = Map((";" -> Keyword.DIAG)),
a02115865bcc streamlined abstract datatype;
wenzelm
parents: 46624
diff changeset
    43
  lexicon: Scan.Lexicon = Scan.Lexicon.empty,
a02115865bcc streamlined abstract datatype;
wenzelm
parents: 46624
diff changeset
    44
  val completion: Completion = Completion.init())
34166
446a33b874b3 renamed class Outer_Keyword to Outer_Syntax;
wenzelm
parents:
diff changeset
    45
{
38471
0924654b8163 report command token name instead of kind, which can be retrieved later via Outer_Syntax.keyword_kind;
wenzelm
parents: 36956
diff changeset
    46
  def keyword_kind(name: String): Option[String] = keywords.get(name)
0924654b8163 report command token name instead of kind, which can be retrieved later via Outer_Syntax.keyword_kind;
wenzelm
parents: 36956
diff changeset
    47
40533
e38e80686ce5 somewhat adhoc replacement for 'thus' and 'hence';
wenzelm
parents: 40459
diff changeset
    48
  def + (name: String, kind: String, replace: String): Outer_Syntax =
46626
a02115865bcc streamlined abstract datatype;
wenzelm
parents: 46624
diff changeset
    49
    new Outer_Syntax(
a02115865bcc streamlined abstract datatype;
wenzelm
parents: 46624
diff changeset
    50
      keywords + (name -> kind),
a02115865bcc streamlined abstract datatype;
wenzelm
parents: 46624
diff changeset
    51
      lexicon + name,
a02115865bcc streamlined abstract datatype;
wenzelm
parents: 46624
diff changeset
    52
      if (Keyword.control(kind)) completion else completion + (name, replace))
34166
446a33b874b3 renamed class Outer_Keyword to Outer_Syntax;
wenzelm
parents:
diff changeset
    53
40533
e38e80686ce5 somewhat adhoc replacement for 'thus' and 'hence';
wenzelm
parents: 40459
diff changeset
    54
  def + (name: String, kind: String): Outer_Syntax = this + (name, kind, name)
36947
285b39022372 renamed Outer_Keyword to Keyword (in Scala);
wenzelm
parents: 34264
diff changeset
    55
  def + (name: String): Outer_Syntax = this + (name, Keyword.MINOR)
46940
a40be2f10ca9 explicit Outer_Syntax.Decl;
wenzelm
parents: 46712
diff changeset
    56
  def + (decl: Outer_Syntax.Decl): Outer_Syntax =
a40be2f10ca9 explicit Outer_Syntax.Decl;
wenzelm
parents: 46712
diff changeset
    57
    decl match {
a40be2f10ca9 explicit Outer_Syntax.Decl;
wenzelm
parents: 46712
diff changeset
    58
      case ((name, Some((kind, _)))) => this + (name, kind)
a40be2f10ca9 explicit Outer_Syntax.Decl;
wenzelm
parents: 46712
diff changeset
    59
      case ((name, None)) => this + name
a40be2f10ca9 explicit Outer_Syntax.Decl;
wenzelm
parents: 46712
diff changeset
    60
    }
34166
446a33b874b3 renamed class Outer_Keyword to Outer_Syntax;
wenzelm
parents:
diff changeset
    61
446a33b874b3 renamed class Outer_Keyword to Outer_Syntax;
wenzelm
parents:
diff changeset
    62
  def is_command(name: String): Boolean =
40458
12c8c64203b3 treat main theory commands like headings, and nest anything else inside;
wenzelm
parents: 40455
diff changeset
    63
    keyword_kind(name) match {
36947
285b39022372 renamed Outer_Keyword to Keyword (in Scala);
wenzelm
parents: 34264
diff changeset
    64
      case Some(kind) => kind != Keyword.MINOR
34166
446a33b874b3 renamed class Outer_Keyword to Outer_Syntax;
wenzelm
parents:
diff changeset
    65
      case None => false
446a33b874b3 renamed class Outer_Keyword to Outer_Syntax;
wenzelm
parents:
diff changeset
    66
    }
446a33b874b3 renamed class Outer_Keyword to Outer_Syntax;
wenzelm
parents:
diff changeset
    67
40454
2516ea25a54b some support for nested source structure, based on section headings;
wenzelm
parents: 38471
diff changeset
    68
  def heading_level(name: String): Option[Int] =
2516ea25a54b some support for nested source structure, based on section headings;
wenzelm
parents: 38471
diff changeset
    69
    name match {
40458
12c8c64203b3 treat main theory commands like headings, and nest anything else inside;
wenzelm
parents: 40455
diff changeset
    70
      // FIXME avoid hard-wired info!?
40454
2516ea25a54b some support for nested source structure, based on section headings;
wenzelm
parents: 38471
diff changeset
    71
      case "header" => Some(1)
2516ea25a54b some support for nested source structure, based on section headings;
wenzelm
parents: 38471
diff changeset
    72
      case "chapter" => Some(2)
2516ea25a54b some support for nested source structure, based on section headings;
wenzelm
parents: 38471
diff changeset
    73
      case "section" | "sect" => Some(3)
2516ea25a54b some support for nested source structure, based on section headings;
wenzelm
parents: 38471
diff changeset
    74
      case "subsection" | "subsect" => Some(4)
2516ea25a54b some support for nested source structure, based on section headings;
wenzelm
parents: 38471
diff changeset
    75
      case "subsubsection" | "subsubsect" => Some(5)
40458
12c8c64203b3 treat main theory commands like headings, and nest anything else inside;
wenzelm
parents: 40455
diff changeset
    76
      case _ =>
12c8c64203b3 treat main theory commands like headings, and nest anything else inside;
wenzelm
parents: 40455
diff changeset
    77
        keyword_kind(name) match {
12c8c64203b3 treat main theory commands like headings, and nest anything else inside;
wenzelm
parents: 40455
diff changeset
    78
          case Some(kind) if Keyword.theory(kind) => Some(6)
12c8c64203b3 treat main theory commands like headings, and nest anything else inside;
wenzelm
parents: 40455
diff changeset
    79
          case _ => None
12c8c64203b3 treat main theory commands like headings, and nest anything else inside;
wenzelm
parents: 40455
diff changeset
    80
        }
40454
2516ea25a54b some support for nested source structure, based on section headings;
wenzelm
parents: 38471
diff changeset
    81
    }
2516ea25a54b some support for nested source structure, based on section headings;
wenzelm
parents: 38471
diff changeset
    82
2516ea25a54b some support for nested source structure, based on section headings;
wenzelm
parents: 38471
diff changeset
    83
  def heading_level(command: Command): Option[Int] =
2516ea25a54b some support for nested source structure, based on section headings;
wenzelm
parents: 38471
diff changeset
    84
    heading_level(command.name)
2516ea25a54b some support for nested source structure, based on section headings;
wenzelm
parents: 38471
diff changeset
    85
34166
446a33b874b3 renamed class Outer_Keyword to Outer_Syntax;
wenzelm
parents:
diff changeset
    86
446a33b874b3 renamed class Outer_Keyword to Outer_Syntax;
wenzelm
parents:
diff changeset
    87
  /* tokenize */
446a33b874b3 renamed class Outer_Keyword to Outer_Syntax;
wenzelm
parents:
diff changeset
    88
36956
21be4832c362 renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents: 36947
diff changeset
    89
  def scan(input: Reader[Char]): List[Token] =
34166
446a33b874b3 renamed class Outer_Keyword to Outer_Syntax;
wenzelm
parents:
diff changeset
    90
  {
446a33b874b3 renamed class Outer_Keyword to Outer_Syntax;
wenzelm
parents:
diff changeset
    91
    import lexicon._
446a33b874b3 renamed class Outer_Keyword to Outer_Syntax;
wenzelm
parents:
diff changeset
    92
43695
5130dfe1b7be simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents: 43455
diff changeset
    93
    parseAll(rep(token(is_command)), input) match {
34166
446a33b874b3 renamed class Outer_Keyword to Outer_Syntax;
wenzelm
parents:
diff changeset
    94
      case Success(tokens, _) => tokens
34264
b5025782a4ed tuned message;
wenzelm
parents: 34166
diff changeset
    95
      case _ => error("Unexpected failure of tokenizing input:\n" + input.source.toString)
34166
446a33b874b3 renamed class Outer_Keyword to Outer_Syntax;
wenzelm
parents:
diff changeset
    96
    }
446a33b874b3 renamed class Outer_Keyword to Outer_Syntax;
wenzelm
parents:
diff changeset
    97
  }
446a33b874b3 renamed class Outer_Keyword to Outer_Syntax;
wenzelm
parents:
diff changeset
    98
36956
21be4832c362 renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents: 36947
diff changeset
    99
  def scan(input: CharSequence): List[Token] =
34166
446a33b874b3 renamed class Outer_Keyword to Outer_Syntax;
wenzelm
parents:
diff changeset
   100
    scan(new CharSequenceReader(input))
43411
0206466ee473 some support for partial scans with explicit context;
wenzelm
parents: 40533
diff changeset
   101
0206466ee473 some support for partial scans with explicit context;
wenzelm
parents: 40533
diff changeset
   102
  def scan_context(input: CharSequence, context: Scan.Context): (List[Token], Scan.Context) =
0206466ee473 some support for partial scans with explicit context;
wenzelm
parents: 40533
diff changeset
   103
  {
0206466ee473 some support for partial scans with explicit context;
wenzelm
parents: 40533
diff changeset
   104
    import lexicon._
0206466ee473 some support for partial scans with explicit context;
wenzelm
parents: 40533
diff changeset
   105
0206466ee473 some support for partial scans with explicit context;
wenzelm
parents: 40533
diff changeset
   106
    var in: Reader[Char] = new CharSequenceReader(input)
0206466ee473 some support for partial scans with explicit context;
wenzelm
parents: 40533
diff changeset
   107
    val toks = new mutable.ListBuffer[Token]
0206466ee473 some support for partial scans with explicit context;
wenzelm
parents: 40533
diff changeset
   108
    var ctxt = context
0206466ee473 some support for partial scans with explicit context;
wenzelm
parents: 40533
diff changeset
   109
    while (!in.atEnd) {
43695
5130dfe1b7be simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents: 43455
diff changeset
   110
      parse(token_context(is_command, ctxt), in) match {
43411
0206466ee473 some support for partial scans with explicit context;
wenzelm
parents: 40533
diff changeset
   111
        case Success((x, c), rest) => { toks += x; ctxt = c; in = rest }
0206466ee473 some support for partial scans with explicit context;
wenzelm
parents: 40533
diff changeset
   112
        case NoSuccess(_, rest) =>
0206466ee473 some support for partial scans with explicit context;
wenzelm
parents: 40533
diff changeset
   113
          error("Unexpected failure of tokenizing input:\n" + rest.source.toString)
0206466ee473 some support for partial scans with explicit context;
wenzelm
parents: 40533
diff changeset
   114
      }
0206466ee473 some support for partial scans with explicit context;
wenzelm
parents: 40533
diff changeset
   115
    }
0206466ee473 some support for partial scans with explicit context;
wenzelm
parents: 40533
diff changeset
   116
    (toks.toList, ctxt)
0206466ee473 some support for partial scans with explicit context;
wenzelm
parents: 40533
diff changeset
   117
  }
34166
446a33b874b3 renamed class Outer_Keyword to Outer_Syntax;
wenzelm
parents:
diff changeset
   118
}