author | wenzelm |
Mon, 20 Aug 2012 14:09:09 +0200 | |
changeset 48864 | 3ee314ae1e0a |
parent 48708 | 189ece4b4ff1 |
child 48870 | 4accee106f0f |
permissions | -rw-r--r-- |
34166 | 1 |
/* Title: Pure/Isar/outer_syntax.scala |
2 |
Author: Makarius |
|
3 |
||
4 |
Isabelle/Isar outer syntax. |
|
5 |
*/ |
|
6 |
||
7 |
package isabelle |
|
8 |
||
9 |
||
10 |
import scala.util.parsing.input.{Reader, CharSequenceReader} |
|
43411
0206466ee473
some support for partial scans with explicit context;
wenzelm
parents:
40533
diff
changeset
|
11 |
import scala.collection.mutable |
34166 | 12 |
|
13 |
||
43774
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
14 |
object Outer_Syntax |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
15 |
{ |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
16 |
def quote_string(str: String): String = |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
17 |
{ |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
18 |
val result = new StringBuilder(str.length + 10) |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
19 |
result += '"' |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
20 |
for (s <- Symbol.iterator(str)) { |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
21 |
if (s.length == 1) { |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
22 |
val c = s(0) |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
23 |
if (c < 32 && c != YXML.X && c != YXML.Y || c == '\\' || c == '"') { |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
24 |
result += '\\' |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
25 |
if (c < 10) result += '0' |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
26 |
if (c < 100) result += '0' |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
27 |
result ++= (c.asInstanceOf[Int].toString) |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
28 |
} |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
29 |
else result += c |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
30 |
} |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
31 |
else result ++= s |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
32 |
} |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
33 |
result += '"' |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
34 |
result.toString |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
35 |
} |
46626 | 36 |
|
46941 | 37 |
val empty: Outer_Syntax = new Outer_Syntax() |
38 |
def init(): Outer_Syntax = new Outer_Syntax(completion = Completion.init()) |
|
43774
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
39 |
} |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
40 |
|
46712 | 41 |
final class Outer_Syntax private( |
48864
3ee314ae1e0a
added keyword kind "thy_load" (with optional list of file extensions);
wenzelm
parents:
48708
diff
changeset
|
42 |
keywords: Map[String, (String, List[String])] = Map.empty, |
46626 | 43 |
lexicon: Scan.Lexicon = Scan.Lexicon.empty, |
46941 | 44 |
val completion: Completion = Completion.empty) |
34166 | 45 |
{ |
48660
730ca503e955
static outer syntax based on session specifications;
wenzelm
parents:
47469
diff
changeset
|
46 |
override def toString: String = |
48864
3ee314ae1e0a
added keyword kind "thy_load" (with optional list of file extensions);
wenzelm
parents:
48708
diff
changeset
|
47 |
(for ((name, (kind, files)) <- keywords) yield { |
48660
730ca503e955
static outer syntax based on session specifications;
wenzelm
parents:
47469
diff
changeset
|
48 |
if (kind == Keyword.MINOR) quote(name) |
48864
3ee314ae1e0a
added keyword kind "thy_load" (with optional list of file extensions);
wenzelm
parents:
48708
diff
changeset
|
49 |
else |
3ee314ae1e0a
added keyword kind "thy_load" (with optional list of file extensions);
wenzelm
parents:
48708
diff
changeset
|
50 |
quote(name) + " :: " + quote(kind) + |
3ee314ae1e0a
added keyword kind "thy_load" (with optional list of file extensions);
wenzelm
parents:
48708
diff
changeset
|
51 |
(if (files.isEmpty) "" else " (" + commas_quote(files) + ")") |
48671 | 52 |
}).toList.sorted.mkString("keywords\n ", " and\n ", "") |
48660
730ca503e955
static outer syntax based on session specifications;
wenzelm
parents:
47469
diff
changeset
|
53 |
|
48864
3ee314ae1e0a
added keyword kind "thy_load" (with optional list of file extensions);
wenzelm
parents:
48708
diff
changeset
|
54 |
def keyword_kind_files(name: String): Option[(String, List[String])] = keywords.get(name) |
3ee314ae1e0a
added keyword kind "thy_load" (with optional list of file extensions);
wenzelm
parents:
48708
diff
changeset
|
55 |
def keyword_kind(name: String): Option[String] = keyword_kind_files(name).map(_._1) |
38471
0924654b8163
report command token name instead of kind, which can be retrieved later via Outer_Syntax.keyword_kind;
wenzelm
parents:
36956
diff
changeset
|
56 |
|
48864
3ee314ae1e0a
added keyword kind "thy_load" (with optional list of file extensions);
wenzelm
parents:
48708
diff
changeset
|
57 |
def + (name: String, kind: (String, List[String]), replace: String): Outer_Syntax = |
46626 | 58 |
new Outer_Syntax( |
59 |
keywords + (name -> kind), |
|
60 |
lexicon + name, |
|
48864
3ee314ae1e0a
added keyword kind "thy_load" (with optional list of file extensions);
wenzelm
parents:
48708
diff
changeset
|
61 |
if (Keyword.control(kind._1)) completion else completion + (name, replace)) |
34166 | 62 |
|
48864
3ee314ae1e0a
added keyword kind "thy_load" (with optional list of file extensions);
wenzelm
parents:
48708
diff
changeset
|
63 |
def + (name: String, kind: (String, List[String])): Outer_Syntax = this + (name, kind, name) |
3ee314ae1e0a
added keyword kind "thy_load" (with optional list of file extensions);
wenzelm
parents:
48708
diff
changeset
|
64 |
def + (name: String, kind: String): Outer_Syntax = this + (name, (kind, Nil), name) |
36947 | 65 |
def + (name: String): Outer_Syntax = this + (name, Keyword.MINOR) |
48706 | 66 |
|
48707
ba531af91148
simplified Document.Node.Header -- internalized errors;
wenzelm
parents:
48706
diff
changeset
|
67 |
def add_keywords(header: Document.Node.Header): Outer_Syntax = |
ba531af91148
simplified Document.Node.Header -- internalized errors;
wenzelm
parents:
48706
diff
changeset
|
68 |
(this /: header.keywords) { |
48708 | 69 |
case (syntax, ((name, Some((kind, _))))) => |
70 |
syntax + (Symbol.decode(name), kind) + (Symbol.encode(name), kind) |
|
71 |
case (syntax, ((name, None))) => |
|
72 |
syntax + Symbol.decode(name) + Symbol.encode(name) |
|
46940 | 73 |
} |
34166 | 74 |
|
75 |
def is_command(name: String): Boolean = |
|
40458
12c8c64203b3
treat main theory commands like headings, and nest anything else inside;
wenzelm
parents:
40455
diff
changeset
|
76 |
keyword_kind(name) match { |
36947 | 77 |
case Some(kind) => kind != Keyword.MINOR |
34166 | 78 |
case None => false |
79 |
} |
|
80 |
||
40454
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38471
diff
changeset
|
81 |
def heading_level(name: String): Option[Int] = |
46969 | 82 |
{ |
83 |
keyword_kind(name) match { |
|
84 |
case _ if name == "header" => Some(0) |
|
85 |
case Some(Keyword.THY_HEADING1) => Some(1) |
|
86 |
case Some(Keyword.THY_HEADING2) | Some(Keyword.PRF_HEADING2) => Some(2) |
|
87 |
case Some(Keyword.THY_HEADING3) | Some(Keyword.PRF_HEADING3) => Some(3) |
|
88 |
case Some(Keyword.THY_HEADING4) | Some(Keyword.PRF_HEADING4) => Some(4) |
|
89 |
case Some(kind) if Keyword.theory(kind) => Some(5) |
|
90 |
case _ => None |
|
40454
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38471
diff
changeset
|
91 |
} |
46969 | 92 |
} |
40454
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38471
diff
changeset
|
93 |
|
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38471
diff
changeset
|
94 |
def heading_level(command: Command): Option[Int] = |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38471
diff
changeset
|
95 |
heading_level(command.name) |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38471
diff
changeset
|
96 |
|
34166 | 97 |
|
98 |
/* tokenize */ |
|
99 |
||
36956
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents:
36947
diff
changeset
|
100 |
def scan(input: Reader[Char]): List[Token] = |
34166 | 101 |
{ |
102 |
import lexicon._ |
|
103 |
||
43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents:
43455
diff
changeset
|
104 |
parseAll(rep(token(is_command)), input) match { |
34166 | 105 |
case Success(tokens, _) => tokens |
34264 | 106 |
case _ => error("Unexpected failure of tokenizing input:\n" + input.source.toString) |
34166 | 107 |
} |
108 |
} |
|
109 |
||
36956
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents:
36947
diff
changeset
|
110 |
def scan(input: CharSequence): List[Token] = |
34166 | 111 |
scan(new CharSequenceReader(input)) |
43411
0206466ee473
some support for partial scans with explicit context;
wenzelm
parents:
40533
diff
changeset
|
112 |
|
0206466ee473
some support for partial scans with explicit context;
wenzelm
parents:
40533
diff
changeset
|
113 |
def scan_context(input: CharSequence, context: Scan.Context): (List[Token], Scan.Context) = |
0206466ee473
some support for partial scans with explicit context;
wenzelm
parents:
40533
diff
changeset
|
114 |
{ |
0206466ee473
some support for partial scans with explicit context;
wenzelm
parents:
40533
diff
changeset
|
115 |
import lexicon._ |
0206466ee473
some support for partial scans with explicit context;
wenzelm
parents:
40533
diff
changeset
|
116 |
|
0206466ee473
some support for partial scans with explicit context;
wenzelm
parents:
40533
diff
changeset
|
117 |
var in: Reader[Char] = new CharSequenceReader(input) |
0206466ee473
some support for partial scans with explicit context;
wenzelm
parents:
40533
diff
changeset
|
118 |
val toks = new mutable.ListBuffer[Token] |
0206466ee473
some support for partial scans with explicit context;
wenzelm
parents:
40533
diff
changeset
|
119 |
var ctxt = context |
0206466ee473
some support for partial scans with explicit context;
wenzelm
parents:
40533
diff
changeset
|
120 |
while (!in.atEnd) { |
43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents:
43455
diff
changeset
|
121 |
parse(token_context(is_command, ctxt), in) match { |
43411
0206466ee473
some support for partial scans with explicit context;
wenzelm
parents:
40533
diff
changeset
|
122 |
case Success((x, c), rest) => { toks += x; ctxt = c; in = rest } |
0206466ee473
some support for partial scans with explicit context;
wenzelm
parents:
40533
diff
changeset
|
123 |
case NoSuccess(_, rest) => |
0206466ee473
some support for partial scans with explicit context;
wenzelm
parents:
40533
diff
changeset
|
124 |
error("Unexpected failure of tokenizing input:\n" + rest.source.toString) |
0206466ee473
some support for partial scans with explicit context;
wenzelm
parents:
40533
diff
changeset
|
125 |
} |
0206466ee473
some support for partial scans with explicit context;
wenzelm
parents:
40533
diff
changeset
|
126 |
} |
0206466ee473
some support for partial scans with explicit context;
wenzelm
parents:
40533
diff
changeset
|
127 |
(toks.toList, ctxt) |
0206466ee473
some support for partial scans with explicit context;
wenzelm
parents:
40533
diff
changeset
|
128 |
} |
34166 | 129 |
} |