| author | wenzelm | 
| Thu, 01 Dec 2011 20:54:48 +0100 | |
| changeset 45717 | b4e7b9968e60 | 
| parent 44051 | 2ec66075a75c | 
| child 46624 | dc4c72092088 | 
| permissions | -rw-r--r-- | 
| 34166 | 1 | /* Title: Pure/Isar/outer_syntax.scala | 
| 2 | Author: Makarius | |
| 3 | ||
| 4 | Isabelle/Isar outer syntax. | |
| 5 | */ | |
| 6 | ||
| 7 | package isabelle | |
| 8 | ||
| 9 | ||
| 10 | import scala.util.parsing.input.{Reader, CharSequenceReader}
 | |
| 43411 
0206466ee473
some support for partial scans with explicit context;
 wenzelm parents: 
40533diff
changeset | 11 | import scala.collection.mutable | 
| 34166 | 12 | |
| 13 | ||
| 43774 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 14 | object Outer_Syntax | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 15 | {
 | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 16 | def quote_string(str: String): String = | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 17 |   {
 | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 18 | val result = new StringBuilder(str.length + 10) | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 19 | result += '"' | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 20 |     for (s <- Symbol.iterator(str)) {
 | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 21 |       if (s.length == 1) {
 | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 22 | val c = s(0) | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 23 |         if (c < 32 && c != YXML.X && c != YXML.Y || c == '\\' || c == '"') {
 | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 24 | result += '\\' | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 25 | if (c < 10) result += '0' | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 26 | if (c < 100) result += '0' | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 27 | result ++= (c.asInstanceOf[Int].toString) | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 28 | } | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 29 | else result += c | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 30 | } | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 31 | else result ++= s | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 32 | } | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 33 | result += '"' | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 34 | result.toString | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 35 | } | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 36 | } | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 37 | |
| 43695 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43455diff
changeset | 38 | class Outer_Syntax | 
| 34166 | 39 | {
 | 
| 36947 | 40 |   protected val keywords: Map[String, String] = Map((";" -> Keyword.DIAG))
 | 
| 34166 | 41 | protected val lexicon: Scan.Lexicon = Scan.Lexicon.empty | 
| 43695 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43455diff
changeset | 42 | lazy val completion: Completion = (new Completion).add_symbols // FIXME odd initialization | 
| 34166 | 43 | |
| 38471 
0924654b8163
report command token name instead of kind, which can be retrieved later via Outer_Syntax.keyword_kind;
 wenzelm parents: 
36956diff
changeset | 44 | def keyword_kind(name: String): Option[String] = keywords.get(name) | 
| 
0924654b8163
report command token name instead of kind, which can be retrieved later via Outer_Syntax.keyword_kind;
 wenzelm parents: 
36956diff
changeset | 45 | |
| 40533 
e38e80686ce5
somewhat adhoc replacement for 'thus' and 'hence';
 wenzelm parents: 
40459diff
changeset | 46 | def + (name: String, kind: String, replace: String): Outer_Syntax = | 
| 34166 | 47 |   {
 | 
| 48 | val new_keywords = keywords + (name -> kind) | |
| 49 | val new_lexicon = lexicon + name | |
| 44051 
2ec66075a75c
avoid pointless completion of illegal control commands;
 wenzelm parents: 
43774diff
changeset | 50 | val new_completion = | 
| 
2ec66075a75c
avoid pointless completion of illegal control commands;
 wenzelm parents: 
43774diff
changeset | 51 | if (Keyword.control(kind)) completion | 
| 
2ec66075a75c
avoid pointless completion of illegal control commands;
 wenzelm parents: 
43774diff
changeset | 52 | else completion + (name, replace) | 
| 43695 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43455diff
changeset | 53 |     new Outer_Syntax {
 | 
| 34166 | 54 | override val lexicon = new_lexicon | 
| 55 | override val keywords = new_keywords | |
| 56 | override lazy val completion = new_completion | |
| 57 | } | |
| 58 | } | |
| 59 | ||
| 40533 
e38e80686ce5
somewhat adhoc replacement for 'thus' and 'hence';
 wenzelm parents: 
40459diff
changeset | 60 | def + (name: String, kind: String): Outer_Syntax = this + (name, kind, name) | 
| 
e38e80686ce5
somewhat adhoc replacement for 'thus' and 'hence';
 wenzelm parents: 
40459diff
changeset | 61 | |
| 36947 | 62 | def + (name: String): Outer_Syntax = this + (name, Keyword.MINOR) | 
| 34166 | 63 | |
| 64 | def is_command(name: String): Boolean = | |
| 40458 
12c8c64203b3
treat main theory commands like headings, and nest anything else inside;
 wenzelm parents: 
40455diff
changeset | 65 |     keyword_kind(name) match {
 | 
| 36947 | 66 | case Some(kind) => kind != Keyword.MINOR | 
| 34166 | 67 | case None => false | 
| 68 | } | |
| 69 | ||
| 40454 
2516ea25a54b
some support for nested source structure, based on section headings;
 wenzelm parents: 
38471diff
changeset | 70 | def heading_level(name: String): Option[Int] = | 
| 
2516ea25a54b
some support for nested source structure, based on section headings;
 wenzelm parents: 
38471diff
changeset | 71 |     name match {
 | 
| 40458 
12c8c64203b3
treat main theory commands like headings, and nest anything else inside;
 wenzelm parents: 
40455diff
changeset | 72 | // FIXME avoid hard-wired info!? | 
| 40454 
2516ea25a54b
some support for nested source structure, based on section headings;
 wenzelm parents: 
38471diff
changeset | 73 | case "header" => Some(1) | 
| 
2516ea25a54b
some support for nested source structure, based on section headings;
 wenzelm parents: 
38471diff
changeset | 74 | case "chapter" => Some(2) | 
| 
2516ea25a54b
some support for nested source structure, based on section headings;
 wenzelm parents: 
38471diff
changeset | 75 | case "section" | "sect" => Some(3) | 
| 
2516ea25a54b
some support for nested source structure, based on section headings;
 wenzelm parents: 
38471diff
changeset | 76 | case "subsection" | "subsect" => Some(4) | 
| 
2516ea25a54b
some support for nested source structure, based on section headings;
 wenzelm parents: 
38471diff
changeset | 77 | case "subsubsection" | "subsubsect" => Some(5) | 
| 40458 
12c8c64203b3
treat main theory commands like headings, and nest anything else inside;
 wenzelm parents: 
40455diff
changeset | 78 | case _ => | 
| 
12c8c64203b3
treat main theory commands like headings, and nest anything else inside;
 wenzelm parents: 
40455diff
changeset | 79 |         keyword_kind(name) match {
 | 
| 
12c8c64203b3
treat main theory commands like headings, and nest anything else inside;
 wenzelm parents: 
40455diff
changeset | 80 | case Some(kind) if Keyword.theory(kind) => Some(6) | 
| 
12c8c64203b3
treat main theory commands like headings, and nest anything else inside;
 wenzelm parents: 
40455diff
changeset | 81 | case _ => None | 
| 
12c8c64203b3
treat main theory commands like headings, and nest anything else inside;
 wenzelm parents: 
40455diff
changeset | 82 | } | 
| 40454 
2516ea25a54b
some support for nested source structure, based on section headings;
 wenzelm parents: 
38471diff
changeset | 83 | } | 
| 
2516ea25a54b
some support for nested source structure, based on section headings;
 wenzelm parents: 
38471diff
changeset | 84 | |
| 
2516ea25a54b
some support for nested source structure, based on section headings;
 wenzelm parents: 
38471diff
changeset | 85 | def heading_level(command: Command): Option[Int] = | 
| 
2516ea25a54b
some support for nested source structure, based on section headings;
 wenzelm parents: 
38471diff
changeset | 86 | heading_level(command.name) | 
| 
2516ea25a54b
some support for nested source structure, based on section headings;
 wenzelm parents: 
38471diff
changeset | 87 | |
| 34166 | 88 | |
| 89 | /* tokenize */ | |
| 90 | ||
| 36956 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 wenzelm parents: 
36947diff
changeset | 91 | def scan(input: Reader[Char]): List[Token] = | 
| 34166 | 92 |   {
 | 
| 93 | import lexicon._ | |
| 94 | ||
| 43695 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43455diff
changeset | 95 |     parseAll(rep(token(is_command)), input) match {
 | 
| 34166 | 96 | case Success(tokens, _) => tokens | 
| 34264 | 97 |       case _ => error("Unexpected failure of tokenizing input:\n" + input.source.toString)
 | 
| 34166 | 98 | } | 
| 99 | } | |
| 100 | ||
| 36956 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 wenzelm parents: 
36947diff
changeset | 101 | def scan(input: CharSequence): List[Token] = | 
| 34166 | 102 | scan(new CharSequenceReader(input)) | 
| 43411 
0206466ee473
some support for partial scans with explicit context;
 wenzelm parents: 
40533diff
changeset | 103 | |
| 
0206466ee473
some support for partial scans with explicit context;
 wenzelm parents: 
40533diff
changeset | 104 | def scan_context(input: CharSequence, context: Scan.Context): (List[Token], Scan.Context) = | 
| 
0206466ee473
some support for partial scans with explicit context;
 wenzelm parents: 
40533diff
changeset | 105 |   {
 | 
| 
0206466ee473
some support for partial scans with explicit context;
 wenzelm parents: 
40533diff
changeset | 106 | import lexicon._ | 
| 
0206466ee473
some support for partial scans with explicit context;
 wenzelm parents: 
40533diff
changeset | 107 | |
| 
0206466ee473
some support for partial scans with explicit context;
 wenzelm parents: 
40533diff
changeset | 108 | var in: Reader[Char] = new CharSequenceReader(input) | 
| 
0206466ee473
some support for partial scans with explicit context;
 wenzelm parents: 
40533diff
changeset | 109 | val toks = new mutable.ListBuffer[Token] | 
| 
0206466ee473
some support for partial scans with explicit context;
 wenzelm parents: 
40533diff
changeset | 110 | var ctxt = context | 
| 
0206466ee473
some support for partial scans with explicit context;
 wenzelm parents: 
40533diff
changeset | 111 |     while (!in.atEnd) {
 | 
| 43695 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43455diff
changeset | 112 |       parse(token_context(is_command, ctxt), in) match {
 | 
| 43411 
0206466ee473
some support for partial scans with explicit context;
 wenzelm parents: 
40533diff
changeset | 113 |         case Success((x, c), rest) => { toks += x; ctxt = c; in = rest }
 | 
| 
0206466ee473
some support for partial scans with explicit context;
 wenzelm parents: 
40533diff
changeset | 114 | case NoSuccess(_, rest) => | 
| 
0206466ee473
some support for partial scans with explicit context;
 wenzelm parents: 
40533diff
changeset | 115 |           error("Unexpected failure of tokenizing input:\n" + rest.source.toString)
 | 
| 
0206466ee473
some support for partial scans with explicit context;
 wenzelm parents: 
40533diff
changeset | 116 | } | 
| 
0206466ee473
some support for partial scans with explicit context;
 wenzelm parents: 
40533diff
changeset | 117 | } | 
| 
0206466ee473
some support for partial scans with explicit context;
 wenzelm parents: 
40533diff
changeset | 118 | (toks.toList, ctxt) | 
| 
0206466ee473
some support for partial scans with explicit context;
 wenzelm parents: 
40533diff
changeset | 119 | } | 
| 34166 | 120 | } |