author | wenzelm |
Mon, 27 Feb 2012 17:13:25 +0100 | |
changeset 46712 | 8650d9a95736 |
parent 46626 | a02115865bcc |
child 46940 | a40be2f10ca9 |
permissions | -rw-r--r-- |
34166 | 1 |
/* Title: Pure/Isar/outer_syntax.scala |
2 |
Author: Makarius |
|
3 |
||
4 |
Isabelle/Isar outer syntax. |
|
5 |
*/ |
|
6 |
||
7 |
package isabelle |
|
8 |
||
9 |
||
10 |
import scala.util.parsing.input.{Reader, CharSequenceReader} |
|
43411
0206466ee473
some support for partial scans with explicit context;
wenzelm
parents:
40533
diff
changeset
|
11 |
import scala.collection.mutable |
34166 | 12 |
|
13 |
||
43774
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
14 |
object Outer_Syntax |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
15 |
{ |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
16 |
def quote_string(str: String): String = |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
17 |
{ |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
18 |
val result = new StringBuilder(str.length + 10) |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
19 |
result += '"' |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
20 |
for (s <- Symbol.iterator(str)) { |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
21 |
if (s.length == 1) { |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
22 |
val c = s(0) |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
23 |
if (c < 32 && c != YXML.X && c != YXML.Y || c == '\\' || c == '"') { |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
24 |
result += '\\' |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
25 |
if (c < 10) result += '0' |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
26 |
if (c < 100) result += '0' |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
27 |
result ++= (c.asInstanceOf[Int].toString) |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
28 |
} |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
29 |
else result += c |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
30 |
} |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
31 |
else result ++= s |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
32 |
} |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
33 |
result += '"' |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
34 |
result.toString |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
35 |
} |
46626 | 36 |
|
37 |
def init(): Outer_Syntax = new Outer_Syntax() |
|
43774
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
38 |
} |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
39 |
|
46712 | 40 |
final class Outer_Syntax private( |
46626 | 41 |
keywords: Map[String, String] = Map((";" -> Keyword.DIAG)), |
42 |
lexicon: Scan.Lexicon = Scan.Lexicon.empty, |
|
43 |
val completion: Completion = Completion.init()) |
|
34166 | 44 |
{ |
38471
0924654b8163
report command token name instead of kind, which can be retrieved later via Outer_Syntax.keyword_kind;
wenzelm
parents:
36956
diff
changeset
|
45 |
def keyword_kind(name: String): Option[String] = keywords.get(name) |
0924654b8163
report command token name instead of kind, which can be retrieved later via Outer_Syntax.keyword_kind;
wenzelm
parents:
36956
diff
changeset
|
46 |
|
40533
e38e80686ce5
somewhat adhoc replacement for 'thus' and 'hence';
wenzelm
parents:
40459
diff
changeset
|
47 |
def + (name: String, kind: String, replace: String): Outer_Syntax = |
46626 | 48 |
new Outer_Syntax( |
49 |
keywords + (name -> kind), |
|
50 |
lexicon + name, |
|
51 |
if (Keyword.control(kind)) completion else completion + (name, replace)) |
|
34166 | 52 |
|
40533
e38e80686ce5
somewhat adhoc replacement for 'thus' and 'hence';
wenzelm
parents:
40459
diff
changeset
|
53 |
def + (name: String, kind: String): Outer_Syntax = this + (name, kind, name) |
e38e80686ce5
somewhat adhoc replacement for 'thus' and 'hence';
wenzelm
parents:
40459
diff
changeset
|
54 |
|
36947 | 55 |
def + (name: String): Outer_Syntax = this + (name, Keyword.MINOR) |
34166 | 56 |
|
57 |
def is_command(name: String): Boolean = |
|
40458
12c8c64203b3
treat main theory commands like headings, and nest anything else inside;
wenzelm
parents:
40455
diff
changeset
|
58 |
keyword_kind(name) match { |
36947 | 59 |
case Some(kind) => kind != Keyword.MINOR |
34166 | 60 |
case None => false |
61 |
} |
|
62 |
||
40454
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38471
diff
changeset
|
63 |
def heading_level(name: String): Option[Int] = |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38471
diff
changeset
|
64 |
name match { |
40458
12c8c64203b3
treat main theory commands like headings, and nest anything else inside;
wenzelm
parents:
40455
diff
changeset
|
65 |
// FIXME avoid hard-wired info!? |
40454
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38471
diff
changeset
|
66 |
case "header" => Some(1) |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38471
diff
changeset
|
67 |
case "chapter" => Some(2) |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38471
diff
changeset
|
68 |
case "section" | "sect" => Some(3) |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38471
diff
changeset
|
69 |
case "subsection" | "subsect" => Some(4) |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38471
diff
changeset
|
70 |
case "subsubsection" | "subsubsect" => Some(5) |
40458
12c8c64203b3
treat main theory commands like headings, and nest anything else inside;
wenzelm
parents:
40455
diff
changeset
|
71 |
case _ => |
12c8c64203b3
treat main theory commands like headings, and nest anything else inside;
wenzelm
parents:
40455
diff
changeset
|
72 |
keyword_kind(name) match { |
12c8c64203b3
treat main theory commands like headings, and nest anything else inside;
wenzelm
parents:
40455
diff
changeset
|
73 |
case Some(kind) if Keyword.theory(kind) => Some(6) |
12c8c64203b3
treat main theory commands like headings, and nest anything else inside;
wenzelm
parents:
40455
diff
changeset
|
74 |
case _ => None |
12c8c64203b3
treat main theory commands like headings, and nest anything else inside;
wenzelm
parents:
40455
diff
changeset
|
75 |
} |
40454
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38471
diff
changeset
|
76 |
} |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38471
diff
changeset
|
77 |
|
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38471
diff
changeset
|
78 |
def heading_level(command: Command): Option[Int] = |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38471
diff
changeset
|
79 |
heading_level(command.name) |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38471
diff
changeset
|
80 |
|
34166 | 81 |
|
82 |
/* tokenize */ |
|
83 |
||
36956
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents:
36947
diff
changeset
|
84 |
def scan(input: Reader[Char]): List[Token] = |
34166 | 85 |
{ |
86 |
import lexicon._ |
|
87 |
||
43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents:
43455
diff
changeset
|
88 |
parseAll(rep(token(is_command)), input) match { |
34166 | 89 |
case Success(tokens, _) => tokens |
34264 | 90 |
case _ => error("Unexpected failure of tokenizing input:\n" + input.source.toString) |
34166 | 91 |
} |
92 |
} |
|
93 |
||
36956
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents:
36947
diff
changeset
|
94 |
def scan(input: CharSequence): List[Token] = |
34166 | 95 |
scan(new CharSequenceReader(input)) |
43411
0206466ee473
some support for partial scans with explicit context;
wenzelm
parents:
40533
diff
changeset
|
96 |
|
0206466ee473
some support for partial scans with explicit context;
wenzelm
parents:
40533
diff
changeset
|
97 |
def scan_context(input: CharSequence, context: Scan.Context): (List[Token], Scan.Context) = |
0206466ee473
some support for partial scans with explicit context;
wenzelm
parents:
40533
diff
changeset
|
98 |
{ |
0206466ee473
some support for partial scans with explicit context;
wenzelm
parents:
40533
diff
changeset
|
99 |
import lexicon._ |
0206466ee473
some support for partial scans with explicit context;
wenzelm
parents:
40533
diff
changeset
|
100 |
|
0206466ee473
some support for partial scans with explicit context;
wenzelm
parents:
40533
diff
changeset
|
101 |
var in: Reader[Char] = new CharSequenceReader(input) |
0206466ee473
some support for partial scans with explicit context;
wenzelm
parents:
40533
diff
changeset
|
102 |
val toks = new mutable.ListBuffer[Token] |
0206466ee473
some support for partial scans with explicit context;
wenzelm
parents:
40533
diff
changeset
|
103 |
var ctxt = context |
0206466ee473
some support for partial scans with explicit context;
wenzelm
parents:
40533
diff
changeset
|
104 |
while (!in.atEnd) { |
43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents:
43455
diff
changeset
|
105 |
parse(token_context(is_command, ctxt), in) match { |
43411
0206466ee473
some support for partial scans with explicit context;
wenzelm
parents:
40533
diff
changeset
|
106 |
case Success((x, c), rest) => { toks += x; ctxt = c; in = rest } |
0206466ee473
some support for partial scans with explicit context;
wenzelm
parents:
40533
diff
changeset
|
107 |
case NoSuccess(_, rest) => |
0206466ee473
some support for partial scans with explicit context;
wenzelm
parents:
40533
diff
changeset
|
108 |
error("Unexpected failure of tokenizing input:\n" + rest.source.toString) |
0206466ee473
some support for partial scans with explicit context;
wenzelm
parents:
40533
diff
changeset
|
109 |
} |
0206466ee473
some support for partial scans with explicit context;
wenzelm
parents:
40533
diff
changeset
|
110 |
} |
0206466ee473
some support for partial scans with explicit context;
wenzelm
parents:
40533
diff
changeset
|
111 |
(toks.toList, ctxt) |
0206466ee473
some support for partial scans with explicit context;
wenzelm
parents:
40533
diff
changeset
|
112 |
} |
34166 | 113 |
} |