| author | wenzelm | 
| Sat, 11 Aug 2012 19:34:36 +0200 | |
| changeset 48772 | e46cd0d26481 | 
| parent 48708 | 189ece4b4ff1 | 
| child 48864 | 3ee314ae1e0a | 
| permissions | -rw-r--r-- | 
| 34166 | 1  | 
/* Title: Pure/Isar/outer_syntax.scala  | 
2  | 
Author: Makarius  | 
|
3  | 
||
4  | 
Isabelle/Isar outer syntax.  | 
|
5  | 
*/  | 
|
6  | 
||
7  | 
package isabelle  | 
|
8  | 
||
9  | 
||
10  | 
import scala.util.parsing.input.{Reader, CharSequenceReader}
 | 
|
| 
43411
 
0206466ee473
some support for partial scans with explicit context;
 
wenzelm 
parents: 
40533 
diff
changeset
 | 
11  | 
import scala.collection.mutable  | 
| 34166 | 12  | 
|
13  | 
||
| 
43774
 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 
wenzelm 
parents: 
43695 
diff
changeset
 | 
14  | 
object Outer_Syntax  | 
| 
 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 
wenzelm 
parents: 
43695 
diff
changeset
 | 
15  | 
{
 | 
| 
 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 
wenzelm 
parents: 
43695 
diff
changeset
 | 
16  | 
def quote_string(str: String): String =  | 
| 
 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 
wenzelm 
parents: 
43695 
diff
changeset
 | 
17  | 
  {
 | 
| 
 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 
wenzelm 
parents: 
43695 
diff
changeset
 | 
18  | 
val result = new StringBuilder(str.length + 10)  | 
| 
 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 
wenzelm 
parents: 
43695 
diff
changeset
 | 
19  | 
result += '"'  | 
| 
 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 
wenzelm 
parents: 
43695 
diff
changeset
 | 
20  | 
    for (s <- Symbol.iterator(str)) {
 | 
| 
 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 
wenzelm 
parents: 
43695 
diff
changeset
 | 
21  | 
      if (s.length == 1) {
 | 
| 
 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 
wenzelm 
parents: 
43695 
diff
changeset
 | 
22  | 
val c = s(0)  | 
| 
 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 
wenzelm 
parents: 
43695 
diff
changeset
 | 
23  | 
        if (c < 32 && c != YXML.X && c != YXML.Y || c == '\\' || c == '"') {
 | 
| 
 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 
wenzelm 
parents: 
43695 
diff
changeset
 | 
24  | 
result += '\\'  | 
| 
 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 
wenzelm 
parents: 
43695 
diff
changeset
 | 
25  | 
if (c < 10) result += '0'  | 
| 
 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 
wenzelm 
parents: 
43695 
diff
changeset
 | 
26  | 
if (c < 100) result += '0'  | 
| 
 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 
wenzelm 
parents: 
43695 
diff
changeset
 | 
27  | 
result ++= (c.asInstanceOf[Int].toString)  | 
| 
 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 
wenzelm 
parents: 
43695 
diff
changeset
 | 
28  | 
}  | 
| 
 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 
wenzelm 
parents: 
43695 
diff
changeset
 | 
29  | 
else result += c  | 
| 
 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 
wenzelm 
parents: 
43695 
diff
changeset
 | 
30  | 
}  | 
| 
 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 
wenzelm 
parents: 
43695 
diff
changeset
 | 
31  | 
else result ++= s  | 
| 
 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 
wenzelm 
parents: 
43695 
diff
changeset
 | 
32  | 
}  | 
| 
 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 
wenzelm 
parents: 
43695 
diff
changeset
 | 
33  | 
result += '"'  | 
| 
 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 
wenzelm 
parents: 
43695 
diff
changeset
 | 
34  | 
result.toString  | 
| 
 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 
wenzelm 
parents: 
43695 
diff
changeset
 | 
35  | 
}  | 
| 46626 | 36  | 
|
| 46941 | 37  | 
val empty: Outer_Syntax = new Outer_Syntax()  | 
38  | 
def init(): Outer_Syntax = new Outer_Syntax(completion = Completion.init())  | 
|
| 
43774
 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 
wenzelm 
parents: 
43695 
diff
changeset
 | 
39  | 
}  | 
| 
 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 
wenzelm 
parents: 
43695 
diff
changeset
 | 
40  | 
|
| 46712 | 41  | 
final class Outer_Syntax private(  | 
| 
47469
 
ba7fe841c885
keyword ";" is declared via prover (as "minor", not "diag");
 
wenzelm 
parents: 
46969 
diff
changeset
 | 
42  | 
keywords: Map[String, String] = Map.empty,  | 
| 46626 | 43  | 
lexicon: Scan.Lexicon = Scan.Lexicon.empty,  | 
| 46941 | 44  | 
val completion: Completion = Completion.empty)  | 
| 34166 | 45  | 
{
 | 
| 
48660
 
730ca503e955
static outer syntax based on session specifications;
 
wenzelm 
parents: 
47469 
diff
changeset
 | 
46  | 
override def toString: String =  | 
| 
 
730ca503e955
static outer syntax based on session specifications;
 
wenzelm 
parents: 
47469 
diff
changeset
 | 
47  | 
    (for ((name, kind) <- keywords) yield {
 | 
| 
 
730ca503e955
static outer syntax based on session specifications;
 
wenzelm 
parents: 
47469 
diff
changeset
 | 
48  | 
if (kind == Keyword.MINOR) quote(name)  | 
| 
 
730ca503e955
static outer syntax based on session specifications;
 
wenzelm 
parents: 
47469 
diff
changeset
 | 
49  | 
else quote(name) + " :: " + quote(kind)  | 
| 48671 | 50  | 
    }).toList.sorted.mkString("keywords\n  ", " and\n  ", "")
 | 
| 
48660
 
730ca503e955
static outer syntax based on session specifications;
 
wenzelm 
parents: 
47469 
diff
changeset
 | 
51  | 
|
| 
38471
 
0924654b8163
report command token name instead of kind, which can be retrieved later via Outer_Syntax.keyword_kind;
 
wenzelm 
parents: 
36956 
diff
changeset
 | 
52  | 
def keyword_kind(name: String): Option[String] = keywords.get(name)  | 
| 
 
0924654b8163
report command token name instead of kind, which can be retrieved later via Outer_Syntax.keyword_kind;
 
wenzelm 
parents: 
36956 
diff
changeset
 | 
53  | 
|
| 
40533
 
e38e80686ce5
somewhat adhoc replacement for 'thus' and 'hence';
 
wenzelm 
parents: 
40459 
diff
changeset
 | 
54  | 
def + (name: String, kind: String, replace: String): Outer_Syntax =  | 
| 46626 | 55  | 
new Outer_Syntax(  | 
56  | 
keywords + (name -> kind),  | 
|
57  | 
lexicon + name,  | 
|
58  | 
if (Keyword.control(kind)) completion else completion + (name, replace))  | 
|
| 34166 | 59  | 
|
| 
40533
 
e38e80686ce5
somewhat adhoc replacement for 'thus' and 'hence';
 
wenzelm 
parents: 
40459 
diff
changeset
 | 
60  | 
def + (name: String, kind: String): Outer_Syntax = this + (name, kind, name)  | 
| 36947 | 61  | 
def + (name: String): Outer_Syntax = this + (name, Keyword.MINOR)  | 
| 48706 | 62  | 
|
| 
48707
 
ba531af91148
simplified Document.Node.Header -- internalized errors;
 
wenzelm 
parents: 
48706 
diff
changeset
 | 
63  | 
def add_keywords(header: Document.Node.Header): Outer_Syntax =  | 
| 
 
ba531af91148
simplified Document.Node.Header -- internalized errors;
 
wenzelm 
parents: 
48706 
diff
changeset
 | 
64  | 
    (this /: header.keywords) {
 | 
| 48708 | 65  | 
case (syntax, ((name, Some((kind, _))))) =>  | 
66  | 
syntax + (Symbol.decode(name), kind) + (Symbol.encode(name), kind)  | 
|
67  | 
case (syntax, ((name, None))) =>  | 
|
68  | 
syntax + Symbol.decode(name) + Symbol.encode(name)  | 
|
| 46940 | 69  | 
}  | 
| 34166 | 70  | 
|
71  | 
def is_command(name: String): Boolean =  | 
|
| 
40458
 
12c8c64203b3
treat main theory commands like headings, and nest anything else inside;
 
wenzelm 
parents: 
40455 
diff
changeset
 | 
72  | 
    keyword_kind(name) match {
 | 
| 36947 | 73  | 
case Some(kind) => kind != Keyword.MINOR  | 
| 34166 | 74  | 
case None => false  | 
75  | 
}  | 
|
76  | 
||
| 
40454
 
2516ea25a54b
some support for nested source structure, based on section headings;
 
wenzelm 
parents: 
38471 
diff
changeset
 | 
77  | 
def heading_level(name: String): Option[Int] =  | 
| 46969 | 78  | 
  {
 | 
79  | 
    keyword_kind(name) match {
 | 
|
80  | 
case _ if name == "header" => Some(0)  | 
|
81  | 
case Some(Keyword.THY_HEADING1) => Some(1)  | 
|
82  | 
case Some(Keyword.THY_HEADING2) | Some(Keyword.PRF_HEADING2) => Some(2)  | 
|
83  | 
case Some(Keyword.THY_HEADING3) | Some(Keyword.PRF_HEADING3) => Some(3)  | 
|
84  | 
case Some(Keyword.THY_HEADING4) | Some(Keyword.PRF_HEADING4) => Some(4)  | 
|
85  | 
case Some(kind) if Keyword.theory(kind) => Some(5)  | 
|
86  | 
case _ => None  | 
|
| 
40454
 
2516ea25a54b
some support for nested source structure, based on section headings;
 
wenzelm 
parents: 
38471 
diff
changeset
 | 
87  | 
}  | 
| 46969 | 88  | 
}  | 
| 
40454
 
2516ea25a54b
some support for nested source structure, based on section headings;
 
wenzelm 
parents: 
38471 
diff
changeset
 | 
89  | 
|
| 
 
2516ea25a54b
some support for nested source structure, based on section headings;
 
wenzelm 
parents: 
38471 
diff
changeset
 | 
90  | 
def heading_level(command: Command): Option[Int] =  | 
| 
 
2516ea25a54b
some support for nested source structure, based on section headings;
 
wenzelm 
parents: 
38471 
diff
changeset
 | 
91  | 
heading_level(command.name)  | 
| 
 
2516ea25a54b
some support for nested source structure, based on section headings;
 
wenzelm 
parents: 
38471 
diff
changeset
 | 
92  | 
|
| 34166 | 93  | 
|
94  | 
/* tokenize */  | 
|
95  | 
||
| 
36956
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
36947 
diff
changeset
 | 
96  | 
def scan(input: Reader[Char]): List[Token] =  | 
| 34166 | 97  | 
  {
 | 
98  | 
import lexicon._  | 
|
99  | 
||
| 
43695
 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 
wenzelm 
parents: 
43455 
diff
changeset
 | 
100  | 
    parseAll(rep(token(is_command)), input) match {
 | 
| 34166 | 101  | 
case Success(tokens, _) => tokens  | 
| 34264 | 102  | 
      case _ => error("Unexpected failure of tokenizing input:\n" + input.source.toString)
 | 
| 34166 | 103  | 
}  | 
104  | 
}  | 
|
105  | 
||
| 
36956
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
36947 
diff
changeset
 | 
106  | 
def scan(input: CharSequence): List[Token] =  | 
| 34166 | 107  | 
scan(new CharSequenceReader(input))  | 
| 
43411
 
0206466ee473
some support for partial scans with explicit context;
 
wenzelm 
parents: 
40533 
diff
changeset
 | 
108  | 
|
| 
 
0206466ee473
some support for partial scans with explicit context;
 
wenzelm 
parents: 
40533 
diff
changeset
 | 
109  | 
def scan_context(input: CharSequence, context: Scan.Context): (List[Token], Scan.Context) =  | 
| 
 
0206466ee473
some support for partial scans with explicit context;
 
wenzelm 
parents: 
40533 
diff
changeset
 | 
110  | 
  {
 | 
| 
 
0206466ee473
some support for partial scans with explicit context;
 
wenzelm 
parents: 
40533 
diff
changeset
 | 
111  | 
import lexicon._  | 
| 
 
0206466ee473
some support for partial scans with explicit context;
 
wenzelm 
parents: 
40533 
diff
changeset
 | 
112  | 
|
| 
 
0206466ee473
some support for partial scans with explicit context;
 
wenzelm 
parents: 
40533 
diff
changeset
 | 
113  | 
var in: Reader[Char] = new CharSequenceReader(input)  | 
| 
 
0206466ee473
some support for partial scans with explicit context;
 
wenzelm 
parents: 
40533 
diff
changeset
 | 
114  | 
val toks = new mutable.ListBuffer[Token]  | 
| 
 
0206466ee473
some support for partial scans with explicit context;
 
wenzelm 
parents: 
40533 
diff
changeset
 | 
115  | 
var ctxt = context  | 
| 
 
0206466ee473
some support for partial scans with explicit context;
 
wenzelm 
parents: 
40533 
diff
changeset
 | 
116  | 
    while (!in.atEnd) {
 | 
| 
43695
 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 
wenzelm 
parents: 
43455 
diff
changeset
 | 
117  | 
      parse(token_context(is_command, ctxt), in) match {
 | 
| 
43411
 
0206466ee473
some support for partial scans with explicit context;
 
wenzelm 
parents: 
40533 
diff
changeset
 | 
118  | 
        case Success((x, c), rest) => { toks += x; ctxt = c; in = rest }
 | 
| 
 
0206466ee473
some support for partial scans with explicit context;
 
wenzelm 
parents: 
40533 
diff
changeset
 | 
119  | 
case NoSuccess(_, rest) =>  | 
| 
 
0206466ee473
some support for partial scans with explicit context;
 
wenzelm 
parents: 
40533 
diff
changeset
 | 
120  | 
          error("Unexpected failure of tokenizing input:\n" + rest.source.toString)
 | 
| 
 
0206466ee473
some support for partial scans with explicit context;
 
wenzelm 
parents: 
40533 
diff
changeset
 | 
121  | 
}  | 
| 
 
0206466ee473
some support for partial scans with explicit context;
 
wenzelm 
parents: 
40533 
diff
changeset
 | 
122  | 
}  | 
| 
 
0206466ee473
some support for partial scans with explicit context;
 
wenzelm 
parents: 
40533 
diff
changeset
 | 
123  | 
(toks.toList, ctxt)  | 
| 
 
0206466ee473
some support for partial scans with explicit context;
 
wenzelm 
parents: 
40533 
diff
changeset
 | 
124  | 
}  | 
| 34166 | 125  | 
}  |