| author | Cezary Kaliszyk <cezarykaliszyk@gmail.com> | 
| Fri, 03 Feb 2012 15:51:10 +0100 | |
| changeset 46404 | 7736068b9f56 | 
| parent 44051 | 2ec66075a75c | 
| child 46624 | dc4c72092088 | 
| permissions | -rw-r--r-- | 
| 34166 | 1  | 
/* Title: Pure/Isar/outer_syntax.scala  | 
2  | 
Author: Makarius  | 
|
3  | 
||
4  | 
Isabelle/Isar outer syntax.  | 
|
5  | 
*/  | 
|
6  | 
||
7  | 
package isabelle  | 
|
8  | 
||
9  | 
||
10  | 
import scala.util.parsing.input.{Reader, CharSequenceReader}
 | 
|
| 
43411
 
0206466ee473
some support for partial scans with explicit context;
 
wenzelm 
parents: 
40533 
diff
changeset
 | 
11  | 
import scala.collection.mutable  | 
| 34166 | 12  | 
|
13  | 
||
| 
43774
 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 
wenzelm 
parents: 
43695 
diff
changeset
 | 
14  | 
object Outer_Syntax  | 
| 
 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 
wenzelm 
parents: 
43695 
diff
changeset
 | 
15  | 
{
 | 
| 
 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 
wenzelm 
parents: 
43695 
diff
changeset
 | 
16  | 
def quote_string(str: String): String =  | 
| 
 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 
wenzelm 
parents: 
43695 
diff
changeset
 | 
17  | 
  {
 | 
| 
 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 
wenzelm 
parents: 
43695 
diff
changeset
 | 
18  | 
val result = new StringBuilder(str.length + 10)  | 
| 
 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 
wenzelm 
parents: 
43695 
diff
changeset
 | 
19  | 
result += '"'  | 
| 
 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 
wenzelm 
parents: 
43695 
diff
changeset
 | 
20  | 
    for (s <- Symbol.iterator(str)) {
 | 
| 
 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 
wenzelm 
parents: 
43695 
diff
changeset
 | 
21  | 
      if (s.length == 1) {
 | 
| 
 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 
wenzelm 
parents: 
43695 
diff
changeset
 | 
22  | 
val c = s(0)  | 
| 
 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 
wenzelm 
parents: 
43695 
diff
changeset
 | 
23  | 
        if (c < 32 && c != YXML.X && c != YXML.Y || c == '\\' || c == '"') {
 | 
| 
 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 
wenzelm 
parents: 
43695 
diff
changeset
 | 
24  | 
result += '\\'  | 
| 
 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 
wenzelm 
parents: 
43695 
diff
changeset
 | 
25  | 
if (c < 10) result += '0'  | 
| 
 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 
wenzelm 
parents: 
43695 
diff
changeset
 | 
26  | 
if (c < 100) result += '0'  | 
| 
 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 
wenzelm 
parents: 
43695 
diff
changeset
 | 
27  | 
result ++= (c.asInstanceOf[Int].toString)  | 
| 
 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 
wenzelm 
parents: 
43695 
diff
changeset
 | 
28  | 
}  | 
| 
 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 
wenzelm 
parents: 
43695 
diff
changeset
 | 
29  | 
else result += c  | 
| 
 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 
wenzelm 
parents: 
43695 
diff
changeset
 | 
30  | 
}  | 
| 
 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 
wenzelm 
parents: 
43695 
diff
changeset
 | 
31  | 
else result ++= s  | 
| 
 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 
wenzelm 
parents: 
43695 
diff
changeset
 | 
32  | 
}  | 
| 
 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 
wenzelm 
parents: 
43695 
diff
changeset
 | 
33  | 
result += '"'  | 
| 
 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 
wenzelm 
parents: 
43695 
diff
changeset
 | 
34  | 
result.toString  | 
| 
 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 
wenzelm 
parents: 
43695 
diff
changeset
 | 
35  | 
}  | 
| 
 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 
wenzelm 
parents: 
43695 
diff
changeset
 | 
36  | 
}  | 
| 
 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 
wenzelm 
parents: 
43695 
diff
changeset
 | 
37  | 
|
| 
43695
 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 
wenzelm 
parents: 
43455 
diff
changeset
 | 
38  | 
class Outer_Syntax  | 
| 34166 | 39  | 
{
 | 
| 36947 | 40  | 
  protected val keywords: Map[String, String] = Map((";" -> Keyword.DIAG))
 | 
| 34166 | 41  | 
protected val lexicon: Scan.Lexicon = Scan.Lexicon.empty  | 
| 
43695
 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 
wenzelm 
parents: 
43455 
diff
changeset
 | 
42  | 
lazy val completion: Completion = (new Completion).add_symbols // FIXME odd initialization  | 
| 34166 | 43  | 
|
| 
38471
 
0924654b8163
report command token name instead of kind, which can be retrieved later via Outer_Syntax.keyword_kind;
 
wenzelm 
parents: 
36956 
diff
changeset
 | 
44  | 
def keyword_kind(name: String): Option[String] = keywords.get(name)  | 
| 
 
0924654b8163
report command token name instead of kind, which can be retrieved later via Outer_Syntax.keyword_kind;
 
wenzelm 
parents: 
36956 
diff
changeset
 | 
45  | 
|
| 
40533
 
e38e80686ce5
somewhat adhoc replacement for 'thus' and 'hence';
 
wenzelm 
parents: 
40459 
diff
changeset
 | 
46  | 
def + (name: String, kind: String, replace: String): Outer_Syntax =  | 
| 34166 | 47  | 
  {
 | 
48  | 
val new_keywords = keywords + (name -> kind)  | 
|
49  | 
val new_lexicon = lexicon + name  | 
|
| 
44051
 
2ec66075a75c
avoid pointless completion of illegal control commands;
 
wenzelm 
parents: 
43774 
diff
changeset
 | 
50  | 
val new_completion =  | 
| 
 
2ec66075a75c
avoid pointless completion of illegal control commands;
 
wenzelm 
parents: 
43774 
diff
changeset
 | 
51  | 
if (Keyword.control(kind)) completion  | 
| 
 
2ec66075a75c
avoid pointless completion of illegal control commands;
 
wenzelm 
parents: 
43774 
diff
changeset
 | 
52  | 
else completion + (name, replace)  | 
| 
43695
 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 
wenzelm 
parents: 
43455 
diff
changeset
 | 
53  | 
    new Outer_Syntax {
 | 
| 34166 | 54  | 
override val lexicon = new_lexicon  | 
55  | 
override val keywords = new_keywords  | 
|
56  | 
override lazy val completion = new_completion  | 
|
57  | 
}  | 
|
58  | 
}  | 
|
59  | 
||
| 
40533
 
e38e80686ce5
somewhat adhoc replacement for 'thus' and 'hence';
 
wenzelm 
parents: 
40459 
diff
changeset
 | 
60  | 
def + (name: String, kind: String): Outer_Syntax = this + (name, kind, name)  | 
| 
 
e38e80686ce5
somewhat adhoc replacement for 'thus' and 'hence';
 
wenzelm 
parents: 
40459 
diff
changeset
 | 
61  | 
|
| 36947 | 62  | 
def + (name: String): Outer_Syntax = this + (name, Keyword.MINOR)  | 
| 34166 | 63  | 
|
64  | 
def is_command(name: String): Boolean =  | 
|
| 
40458
 
12c8c64203b3
treat main theory commands like headings, and nest anything else inside;
 
wenzelm 
parents: 
40455 
diff
changeset
 | 
65  | 
    keyword_kind(name) match {
 | 
| 36947 | 66  | 
case Some(kind) => kind != Keyword.MINOR  | 
| 34166 | 67  | 
case None => false  | 
68  | 
}  | 
|
69  | 
||
| 
40454
 
2516ea25a54b
some support for nested source structure, based on section headings;
 
wenzelm 
parents: 
38471 
diff
changeset
 | 
70  | 
def heading_level(name: String): Option[Int] =  | 
| 
 
2516ea25a54b
some support for nested source structure, based on section headings;
 
wenzelm 
parents: 
38471 
diff
changeset
 | 
71  | 
    name match {
 | 
| 
40458
 
12c8c64203b3
treat main theory commands like headings, and nest anything else inside;
 
wenzelm 
parents: 
40455 
diff
changeset
 | 
72  | 
// FIXME avoid hard-wired info!?  | 
| 
40454
 
2516ea25a54b
some support for nested source structure, based on section headings;
 
wenzelm 
parents: 
38471 
diff
changeset
 | 
73  | 
case "header" => Some(1)  | 
| 
 
2516ea25a54b
some support for nested source structure, based on section headings;
 
wenzelm 
parents: 
38471 
diff
changeset
 | 
74  | 
case "chapter" => Some(2)  | 
| 
 
2516ea25a54b
some support for nested source structure, based on section headings;
 
wenzelm 
parents: 
38471 
diff
changeset
 | 
75  | 
case "section" | "sect" => Some(3)  | 
| 
 
2516ea25a54b
some support for nested source structure, based on section headings;
 
wenzelm 
parents: 
38471 
diff
changeset
 | 
76  | 
case "subsection" | "subsect" => Some(4)  | 
| 
 
2516ea25a54b
some support for nested source structure, based on section headings;
 
wenzelm 
parents: 
38471 
diff
changeset
 | 
77  | 
case "subsubsection" | "subsubsect" => Some(5)  | 
| 
40458
 
12c8c64203b3
treat main theory commands like headings, and nest anything else inside;
 
wenzelm 
parents: 
40455 
diff
changeset
 | 
78  | 
case _ =>  | 
| 
 
12c8c64203b3
treat main theory commands like headings, and nest anything else inside;
 
wenzelm 
parents: 
40455 
diff
changeset
 | 
79  | 
        keyword_kind(name) match {
 | 
| 
 
12c8c64203b3
treat main theory commands like headings, and nest anything else inside;
 
wenzelm 
parents: 
40455 
diff
changeset
 | 
80  | 
case Some(kind) if Keyword.theory(kind) => Some(6)  | 
| 
 
12c8c64203b3
treat main theory commands like headings, and nest anything else inside;
 
wenzelm 
parents: 
40455 
diff
changeset
 | 
81  | 
case _ => None  | 
| 
 
12c8c64203b3
treat main theory commands like headings, and nest anything else inside;
 
wenzelm 
parents: 
40455 
diff
changeset
 | 
82  | 
}  | 
| 
40454
 
2516ea25a54b
some support for nested source structure, based on section headings;
 
wenzelm 
parents: 
38471 
diff
changeset
 | 
83  | 
}  | 
| 
 
2516ea25a54b
some support for nested source structure, based on section headings;
 
wenzelm 
parents: 
38471 
diff
changeset
 | 
84  | 
|
| 
 
2516ea25a54b
some support for nested source structure, based on section headings;
 
wenzelm 
parents: 
38471 
diff
changeset
 | 
85  | 
def heading_level(command: Command): Option[Int] =  | 
| 
 
2516ea25a54b
some support for nested source structure, based on section headings;
 
wenzelm 
parents: 
38471 
diff
changeset
 | 
86  | 
heading_level(command.name)  | 
| 
 
2516ea25a54b
some support for nested source structure, based on section headings;
 
wenzelm 
parents: 
38471 
diff
changeset
 | 
87  | 
|
| 34166 | 88  | 
|
89  | 
/* tokenize */  | 
|
90  | 
||
| 
36956
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
36947 
diff
changeset
 | 
91  | 
def scan(input: Reader[Char]): List[Token] =  | 
| 34166 | 92  | 
  {
 | 
93  | 
import lexicon._  | 
|
94  | 
||
| 
43695
 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 
wenzelm 
parents: 
43455 
diff
changeset
 | 
95  | 
    parseAll(rep(token(is_command)), input) match {
 | 
| 34166 | 96  | 
case Success(tokens, _) => tokens  | 
| 34264 | 97  | 
      case _ => error("Unexpected failure of tokenizing input:\n" + input.source.toString)
 | 
| 34166 | 98  | 
}  | 
99  | 
}  | 
|
100  | 
||
| 
36956
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
36947 
diff
changeset
 | 
101  | 
def scan(input: CharSequence): List[Token] =  | 
| 34166 | 102  | 
scan(new CharSequenceReader(input))  | 
| 
43411
 
0206466ee473
some support for partial scans with explicit context;
 
wenzelm 
parents: 
40533 
diff
changeset
 | 
103  | 
|
| 
 
0206466ee473
some support for partial scans with explicit context;
 
wenzelm 
parents: 
40533 
diff
changeset
 | 
104  | 
def scan_context(input: CharSequence, context: Scan.Context): (List[Token], Scan.Context) =  | 
| 
 
0206466ee473
some support for partial scans with explicit context;
 
wenzelm 
parents: 
40533 
diff
changeset
 | 
105  | 
  {
 | 
| 
 
0206466ee473
some support for partial scans with explicit context;
 
wenzelm 
parents: 
40533 
diff
changeset
 | 
106  | 
import lexicon._  | 
| 
 
0206466ee473
some support for partial scans with explicit context;
 
wenzelm 
parents: 
40533 
diff
changeset
 | 
107  | 
|
| 
 
0206466ee473
some support for partial scans with explicit context;
 
wenzelm 
parents: 
40533 
diff
changeset
 | 
108  | 
var in: Reader[Char] = new CharSequenceReader(input)  | 
| 
 
0206466ee473
some support for partial scans with explicit context;
 
wenzelm 
parents: 
40533 
diff
changeset
 | 
109  | 
val toks = new mutable.ListBuffer[Token]  | 
| 
 
0206466ee473
some support for partial scans with explicit context;
 
wenzelm 
parents: 
40533 
diff
changeset
 | 
110  | 
var ctxt = context  | 
| 
 
0206466ee473
some support for partial scans with explicit context;
 
wenzelm 
parents: 
40533 
diff
changeset
 | 
111  | 
    while (!in.atEnd) {
 | 
| 
43695
 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 
wenzelm 
parents: 
43455 
diff
changeset
 | 
112  | 
      parse(token_context(is_command, ctxt), in) match {
 | 
| 
43411
 
0206466ee473
some support for partial scans with explicit context;
 
wenzelm 
parents: 
40533 
diff
changeset
 | 
113  | 
        case Success((x, c), rest) => { toks += x; ctxt = c; in = rest }
 | 
| 
 
0206466ee473
some support for partial scans with explicit context;
 
wenzelm 
parents: 
40533 
diff
changeset
 | 
114  | 
case NoSuccess(_, rest) =>  | 
| 
 
0206466ee473
some support for partial scans with explicit context;
 
wenzelm 
parents: 
40533 
diff
changeset
 | 
115  | 
          error("Unexpected failure of tokenizing input:\n" + rest.source.toString)
 | 
| 
 
0206466ee473
some support for partial scans with explicit context;
 
wenzelm 
parents: 
40533 
diff
changeset
 | 
116  | 
}  | 
| 
 
0206466ee473
some support for partial scans with explicit context;
 
wenzelm 
parents: 
40533 
diff
changeset
 | 
117  | 
}  | 
| 
 
0206466ee473
some support for partial scans with explicit context;
 
wenzelm 
parents: 
40533 
diff
changeset
 | 
118  | 
(toks.toList, ctxt)  | 
| 
 
0206466ee473
some support for partial scans with explicit context;
 
wenzelm 
parents: 
40533 
diff
changeset
 | 
119  | 
}  | 
| 34166 | 120  | 
}  |