| author | wenzelm | 
| Tue, 18 Jun 2013 15:31:52 +0200 | |
| changeset 52415 | d9fed6e99a57 | 
| parent 52066 | 83b7b88770c9 | 
| child 52439 | 4cf3f6153eb8 | 
| permissions | -rw-r--r-- | 
| 34166 | 1 | /* Title: Pure/Isar/outer_syntax.scala | 
| 2 | Author: Makarius | |
| 3 | ||
| 4 | Isabelle/Isar outer syntax. | |
| 5 | */ | |
| 6 | ||
| 7 | package isabelle | |
| 8 | ||
| 9 | ||
| 10 | import scala.util.parsing.input.{Reader, CharSequenceReader}
 | |
| 43411 
0206466ee473
some support for partial scans with explicit context;
 wenzelm parents: 
40533diff
changeset | 11 | import scala.collection.mutable | 
| 34166 | 12 | |
| 13 | ||
| 43774 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 14 | object Outer_Syntax | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 15 | {
 | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 16 | def quote_string(str: String): String = | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 17 |   {
 | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 18 | val result = new StringBuilder(str.length + 10) | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 19 | result += '"' | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 20 |     for (s <- Symbol.iterator(str)) {
 | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 21 |       if (s.length == 1) {
 | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 22 | val c = s(0) | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 23 |         if (c < 32 && c != YXML.X && c != YXML.Y || c == '\\' || c == '"') {
 | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 24 | result += '\\' | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 25 | if (c < 10) result += '0' | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 26 | if (c < 100) result += '0' | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 27 | result ++= (c.asInstanceOf[Int].toString) | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 28 | } | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 29 | else result += c | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 30 | } | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 31 | else result ++= s | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 32 | } | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 33 | result += '"' | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 34 | result.toString | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 35 | } | 
| 46626 | 36 | |
| 46941 | 37 | val empty: Outer_Syntax = new Outer_Syntax() | 
| 48870 
4accee106f0f
clarified initialization of Thy_Load, Thy_Info, Session;
 wenzelm parents: 
48864diff
changeset | 38 | |
| 46941 | 39 | def init(): Outer_Syntax = new Outer_Syntax(completion = Completion.init()) | 
| 48870 
4accee106f0f
clarified initialization of Thy_Load, Thy_Info, Session;
 wenzelm parents: 
48864diff
changeset | 40 | |
| 
4accee106f0f
clarified initialization of Thy_Load, Thy_Info, Session;
 wenzelm parents: 
48864diff
changeset | 41 | def init_pure(): Outer_Syntax = | 
| 
4accee106f0f
clarified initialization of Thy_Load, Thy_Info, Session;
 wenzelm parents: 
48864diff
changeset | 42 |     init() + ("theory", Keyword.THY_BEGIN) + ("ML_file", Keyword.THY_LOAD)
 | 
| 43774 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 43 | } | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 44 | |
| 46712 | 45 | final class Outer_Syntax private( | 
| 48864 
3ee314ae1e0a
added keyword kind "thy_load" (with optional list of file extensions);
 wenzelm parents: 
48708diff
changeset | 46 | keywords: Map[String, (String, List[String])] = Map.empty, | 
| 46626 | 47 | lexicon: Scan.Lexicon = Scan.Lexicon.empty, | 
| 46941 | 48 | val completion: Completion = Completion.empty) | 
| 34166 | 49 | {
 | 
| 48660 
730ca503e955
static outer syntax based on session specifications;
 wenzelm parents: 
47469diff
changeset | 50 | override def toString: String = | 
| 48864 
3ee314ae1e0a
added keyword kind "thy_load" (with optional list of file extensions);
 wenzelm parents: 
48708diff
changeset | 51 |     (for ((name, (kind, files)) <- keywords) yield {
 | 
| 48660 
730ca503e955
static outer syntax based on session specifications;
 wenzelm parents: 
47469diff
changeset | 52 | if (kind == Keyword.MINOR) quote(name) | 
| 48864 
3ee314ae1e0a
added keyword kind "thy_load" (with optional list of file extensions);
 wenzelm parents: 
48708diff
changeset | 53 | else | 
| 
3ee314ae1e0a
added keyword kind "thy_load" (with optional list of file extensions);
 wenzelm parents: 
48708diff
changeset | 54 | quote(name) + " :: " + quote(kind) + | 
| 
3ee314ae1e0a
added keyword kind "thy_load" (with optional list of file extensions);
 wenzelm parents: 
48708diff
changeset | 55 |         (if (files.isEmpty) "" else " (" + commas_quote(files) + ")")
 | 
| 48671 | 56 |     }).toList.sorted.mkString("keywords\n  ", " and\n  ", "")
 | 
| 48660 
730ca503e955
static outer syntax based on session specifications;
 wenzelm parents: 
47469diff
changeset | 57 | |
| 48864 
3ee314ae1e0a
added keyword kind "thy_load" (with optional list of file extensions);
 wenzelm parents: 
48708diff
changeset | 58 | def keyword_kind_files(name: String): Option[(String, List[String])] = keywords.get(name) | 
| 
3ee314ae1e0a
added keyword kind "thy_load" (with optional list of file extensions);
 wenzelm parents: 
48708diff
changeset | 59 | def keyword_kind(name: String): Option[String] = keyword_kind_files(name).map(_._1) | 
| 38471 
0924654b8163
report command token name instead of kind, which can be retrieved later via Outer_Syntax.keyword_kind;
 wenzelm parents: 
36956diff
changeset | 60 | |
| 48885 | 61 | def thy_load_commands: List[(String, List[String])] = | 
| 62 | (for ((name, (Keyword.THY_LOAD, files)) <- keywords.iterator) yield (name, files)).toList | |
| 48872 | 63 | |
| 50128 
599c935aac82
alternative completion for outer syntax keywords;
 wenzelm parents: 
48885diff
changeset | 64 | def + (name: String, kind: (String, List[String]), replace: Option[String]): Outer_Syntax = | 
| 46626 | 65 | new Outer_Syntax( | 
| 66 | keywords + (name -> kind), | |
| 67 | lexicon + name, | |
| 50128 
599c935aac82
alternative completion for outer syntax keywords;
 wenzelm parents: 
48885diff
changeset | 68 |       if (Keyword.control(kind._1) || replace == Some("")) completion
 | 
| 
599c935aac82
alternative completion for outer syntax keywords;
 wenzelm parents: 
48885diff
changeset | 69 | else completion + (name, replace getOrElse name)) | 
| 34166 | 70 | |
| 50128 
599c935aac82
alternative completion for outer syntax keywords;
 wenzelm parents: 
48885diff
changeset | 71 | def + (name: String, kind: (String, List[String])): Outer_Syntax = this + (name, kind, Some(name)) | 
| 
599c935aac82
alternative completion for outer syntax keywords;
 wenzelm parents: 
48885diff
changeset | 72 | def + (name: String, kind: String): Outer_Syntax = this + (name, (kind, Nil), Some(name)) | 
| 
599c935aac82
alternative completion for outer syntax keywords;
 wenzelm parents: 
48885diff
changeset | 73 | def + (name: String, replace: Option[String]): Outer_Syntax = | 
| 
599c935aac82
alternative completion for outer syntax keywords;
 wenzelm parents: 
48885diff
changeset | 74 | this + (name, (Keyword.MINOR, Nil), replace) | 
| 
599c935aac82
alternative completion for outer syntax keywords;
 wenzelm parents: 
48885diff
changeset | 75 | def + (name: String): Outer_Syntax = this + (name, None) | 
| 48706 | 76 | |
| 48873 | 77 | def add_keywords(keywords: Thy_Header.Keywords): Outer_Syntax = | 
| 78 |     (this /: keywords) {
 | |
| 50128 
599c935aac82
alternative completion for outer syntax keywords;
 wenzelm parents: 
48885diff
changeset | 79 | case (syntax, ((name, Some((kind, _)), replace))) => | 
| 
599c935aac82
alternative completion for outer syntax keywords;
 wenzelm parents: 
48885diff
changeset | 80 | syntax + | 
| 
599c935aac82
alternative completion for outer syntax keywords;
 wenzelm parents: 
48885diff
changeset | 81 | (Symbol.decode(name), kind, replace) + | 
| 
599c935aac82
alternative completion for outer syntax keywords;
 wenzelm parents: 
48885diff
changeset | 82 | (Symbol.encode(name), kind, replace) | 
| 
599c935aac82
alternative completion for outer syntax keywords;
 wenzelm parents: 
48885diff
changeset | 83 | case (syntax, ((name, None, replace))) => | 
| 
599c935aac82
alternative completion for outer syntax keywords;
 wenzelm parents: 
48885diff
changeset | 84 | syntax + | 
| 
599c935aac82
alternative completion for outer syntax keywords;
 wenzelm parents: 
48885diff
changeset | 85 | (Symbol.decode(name), replace) + | 
| 
599c935aac82
alternative completion for outer syntax keywords;
 wenzelm parents: 
48885diff
changeset | 86 | (Symbol.encode(name), replace) | 
| 46940 | 87 | } | 
| 34166 | 88 | |
| 89 | def is_command(name: String): Boolean = | |
| 40458 
12c8c64203b3
treat main theory commands like headings, and nest anything else inside;
 wenzelm parents: 
40455diff
changeset | 90 |     keyword_kind(name) match {
 | 
| 36947 | 91 | case Some(kind) => kind != Keyword.MINOR | 
| 34166 | 92 | case None => false | 
| 93 | } | |
| 94 | ||
| 40454 
2516ea25a54b
some support for nested source structure, based on section headings;
 wenzelm parents: 
38471diff
changeset | 95 | def heading_level(name: String): Option[Int] = | 
| 46969 | 96 |   {
 | 
| 97 |     keyword_kind(name) match {
 | |
| 98 | case _ if name == "header" => Some(0) | |
| 99 | case Some(Keyword.THY_HEADING1) => Some(1) | |
| 100 | case Some(Keyword.THY_HEADING2) | Some(Keyword.PRF_HEADING2) => Some(2) | |
| 101 | case Some(Keyword.THY_HEADING3) | Some(Keyword.PRF_HEADING3) => Some(3) | |
| 102 | case Some(Keyword.THY_HEADING4) | Some(Keyword.PRF_HEADING4) => Some(4) | |
| 103 | case Some(kind) if Keyword.theory(kind) => Some(5) | |
| 104 | case _ => None | |
| 40454 
2516ea25a54b
some support for nested source structure, based on section headings;
 wenzelm parents: 
38471diff
changeset | 105 | } | 
| 46969 | 106 | } | 
| 40454 
2516ea25a54b
some support for nested source structure, based on section headings;
 wenzelm parents: 
38471diff
changeset | 107 | |
| 
2516ea25a54b
some support for nested source structure, based on section headings;
 wenzelm parents: 
38471diff
changeset | 108 | def heading_level(command: Command): Option[Int] = | 
| 
2516ea25a54b
some support for nested source structure, based on section headings;
 wenzelm parents: 
38471diff
changeset | 109 | heading_level(command.name) | 
| 
2516ea25a54b
some support for nested source structure, based on section headings;
 wenzelm parents: 
38471diff
changeset | 110 | |
| 34166 | 111 | |
| 112 | /* tokenize */ | |
| 113 | ||
| 36956 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 wenzelm parents: 
36947diff
changeset | 114 | def scan(input: Reader[Char]): List[Token] = | 
| 52066 
83b7b88770c9
discontinued odd workaround for scala-2.9.2, which is hopefully obsolete in scala-2.10.x;
 wenzelm parents: 
50428diff
changeset | 115 |   {
 | 
| 
83b7b88770c9
discontinued odd workaround for scala-2.9.2, which is hopefully obsolete in scala-2.10.x;
 wenzelm parents: 
50428diff
changeset | 116 | import lexicon._ | 
| 34166 | 117 | |
| 52066 
83b7b88770c9
discontinued odd workaround for scala-2.9.2, which is hopefully obsolete in scala-2.10.x;
 wenzelm parents: 
50428diff
changeset | 118 |     parseAll(rep(token(is_command)), input) match {
 | 
| 
83b7b88770c9
discontinued odd workaround for scala-2.9.2, which is hopefully obsolete in scala-2.10.x;
 wenzelm parents: 
50428diff
changeset | 119 | case Success(tokens, _) => tokens | 
| 
83b7b88770c9
discontinued odd workaround for scala-2.9.2, which is hopefully obsolete in scala-2.10.x;
 wenzelm parents: 
50428diff
changeset | 120 |       case _ => error("Unexpected failure of tokenizing input:\n" + input.source.toString)
 | 
| 34166 | 121 | } | 
| 52066 
83b7b88770c9
discontinued odd workaround for scala-2.9.2, which is hopefully obsolete in scala-2.10.x;
 wenzelm parents: 
50428diff
changeset | 122 | } | 
| 34166 | 123 | |
| 36956 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 wenzelm parents: 
36947diff
changeset | 124 | def scan(input: CharSequence): List[Token] = | 
| 34166 | 125 | scan(new CharSequenceReader(input)) | 
| 43411 
0206466ee473
some support for partial scans with explicit context;
 wenzelm parents: 
40533diff
changeset | 126 | |
| 
0206466ee473
some support for partial scans with explicit context;
 wenzelm parents: 
40533diff
changeset | 127 | def scan_context(input: CharSequence, context: Scan.Context): (List[Token], Scan.Context) = | 
| 52066 
83b7b88770c9
discontinued odd workaround for scala-2.9.2, which is hopefully obsolete in scala-2.10.x;
 wenzelm parents: 
50428diff
changeset | 128 |   {
 | 
| 
83b7b88770c9
discontinued odd workaround for scala-2.9.2, which is hopefully obsolete in scala-2.10.x;
 wenzelm parents: 
50428diff
changeset | 129 | import lexicon._ | 
| 43411 
0206466ee473
some support for partial scans with explicit context;
 wenzelm parents: 
40533diff
changeset | 130 | |
| 52066 
83b7b88770c9
discontinued odd workaround for scala-2.9.2, which is hopefully obsolete in scala-2.10.x;
 wenzelm parents: 
50428diff
changeset | 131 | var in: Reader[Char] = new CharSequenceReader(input) | 
| 
83b7b88770c9
discontinued odd workaround for scala-2.9.2, which is hopefully obsolete in scala-2.10.x;
 wenzelm parents: 
50428diff
changeset | 132 | val toks = new mutable.ListBuffer[Token] | 
| 
83b7b88770c9
discontinued odd workaround for scala-2.9.2, which is hopefully obsolete in scala-2.10.x;
 wenzelm parents: 
50428diff
changeset | 133 | var ctxt = context | 
| 
83b7b88770c9
discontinued odd workaround for scala-2.9.2, which is hopefully obsolete in scala-2.10.x;
 wenzelm parents: 
50428diff
changeset | 134 |     while (!in.atEnd) {
 | 
| 
83b7b88770c9
discontinued odd workaround for scala-2.9.2, which is hopefully obsolete in scala-2.10.x;
 wenzelm parents: 
50428diff
changeset | 135 |       parse(token_context(is_command, ctxt), in) match {
 | 
| 
83b7b88770c9
discontinued odd workaround for scala-2.9.2, which is hopefully obsolete in scala-2.10.x;
 wenzelm parents: 
50428diff
changeset | 136 |         case Success((x, c), rest) => { toks += x; ctxt = c; in = rest }
 | 
| 
83b7b88770c9
discontinued odd workaround for scala-2.9.2, which is hopefully obsolete in scala-2.10.x;
 wenzelm parents: 
50428diff
changeset | 137 | case NoSuccess(_, rest) => | 
| 
83b7b88770c9
discontinued odd workaround for scala-2.9.2, which is hopefully obsolete in scala-2.10.x;
 wenzelm parents: 
50428diff
changeset | 138 |           error("Unexpected failure of tokenizing input:\n" + rest.source.toString)
 | 
| 43411 
0206466ee473
some support for partial scans with explicit context;
 wenzelm parents: 
40533diff
changeset | 139 | } | 
| 
0206466ee473
some support for partial scans with explicit context;
 wenzelm parents: 
40533diff
changeset | 140 | } | 
| 52066 
83b7b88770c9
discontinued odd workaround for scala-2.9.2, which is hopefully obsolete in scala-2.10.x;
 wenzelm parents: 
50428diff
changeset | 141 | (toks.toList, ctxt) | 
| 
83b7b88770c9
discontinued odd workaround for scala-2.9.2, which is hopefully obsolete in scala-2.10.x;
 wenzelm parents: 
50428diff
changeset | 142 | } | 
| 34166 | 143 | } |