author | wenzelm |
Tue, 20 Dec 2016 10:45:20 +0100 | |
changeset 64616 | dc3ec40fe41b |
parent 63867 | fb46c031c841 |
child 65383 | 089f2edefb77 |
permissions | -rw-r--r-- |
34166 | 1 |
/* Title: Pure/Isar/outer_syntax.scala |
2 |
Author: Makarius |
|
3 |
||
4 |
Isabelle/Isar outer syntax. |
|
5 |
*/ |
|
6 |
||
7 |
package isabelle |
|
8 |
||
9 |
||
43411
0206466ee473
some support for partial scans with explicit context;
wenzelm
parents:
40533
diff
changeset
|
10 |
import scala.collection.mutable |
34166 | 11 |
|
12 |
||
43774
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
13 |
object Outer_Syntax |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
14 |
{ |
58706 | 15 |
/* syntax */ |
16 |
||
17 |
val empty: Outer_Syntax = new Outer_Syntax() |
|
18 |
||
19 |
def init(): Outer_Syntax = new Outer_Syntax(completion = Completion.init()) |
|
20 |
||
21 |
||
22 |
/* string literals */ |
|
23 |
||
43774
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
24 |
def quote_string(str: String): String = |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
25 |
{ |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
26 |
val result = new StringBuilder(str.length + 10) |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
27 |
result += '"' |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
28 |
for (s <- Symbol.iterator(str)) { |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
29 |
if (s.length == 1) { |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
30 |
val c = s(0) |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
31 |
if (c < 32 && c != YXML.X && c != YXML.Y || c == '\\' || c == '"') { |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
32 |
result += '\\' |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
33 |
if (c < 10) result += '0' |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
34 |
if (c < 100) result += '0' |
60215 | 35 |
result ++= c.asInstanceOf[Int].toString |
43774
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
36 |
} |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
37 |
else result += c |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
38 |
} |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
39 |
else result ++= s |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
40 |
} |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
41 |
result += '"' |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
42 |
result.toString |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
43 |
} |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
44 |
} |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
45 |
|
46712 | 46 |
final class Outer_Syntax private( |
58900 | 47 |
val keywords: Keyword.Keywords = Keyword.Keywords.empty, |
53280
c63a016805b9
explicit indication of outer syntax with no tokens;
wenzelm
parents:
52439
diff
changeset
|
48 |
val completion: Completion = Completion.empty, |
63867 | 49 |
val rev_abbrevs: Thy_Header.Abbrevs = Nil, |
55749 | 50 |
val language_context: Completion.Language_Context = Completion.Language_Context.outer, |
63584
68751fe1c036
tuned signature -- prover-independence is presently theoretical;
wenzelm
parents:
63579
diff
changeset
|
51 |
val has_tokens: Boolean = true) |
34166 | 52 |
{ |
58706 | 53 |
/** syntax content **/ |
54 |
||
58900 | 55 |
override def toString: String = keywords.toString |
56393
22f533e6a049
more abstract Prover.Syntax, as proposed by Carst Tankink;
wenzelm
parents:
56314
diff
changeset
|
56 |
|
58695 | 57 |
|
63867 | 58 |
/* keywords */ |
58695 | 59 |
|
63579 | 60 |
def + (name: String, kind: String = "", tags: List[String] = Nil): Outer_Syntax = |
53280
c63a016805b9
explicit indication of outer syntax with no tokens;
wenzelm
parents:
52439
diff
changeset
|
61 |
{ |
63429 | 62 |
val keywords1 = keywords + (name, kind, tags) |
53280
c63a016805b9
explicit indication of outer syntax with no tokens;
wenzelm
parents:
52439
diff
changeset
|
63 |
val completion1 = |
63587
881e8e2cfec2
implicit keyword completion only for actual words (amending 73939a9b70a3);
wenzelm
parents:
63584
diff
changeset
|
64 |
completion.add_keyword(name). |
881e8e2cfec2
implicit keyword completion only for actual words (amending 73939a9b70a3);
wenzelm
parents:
63584
diff
changeset
|
65 |
add_abbrevs( |
881e8e2cfec2
implicit keyword completion only for actual words (amending 73939a9b70a3);
wenzelm
parents:
63584
diff
changeset
|
66 |
(if (Keyword.theory_block.contains(kind)) List((name, name + "\nbegin\n\u0007\nend")) |
881e8e2cfec2
implicit keyword completion only for actual words (amending 73939a9b70a3);
wenzelm
parents:
63584
diff
changeset
|
67 |
else Nil) ::: |
881e8e2cfec2
implicit keyword completion only for actual words (amending 73939a9b70a3);
wenzelm
parents:
63584
diff
changeset
|
68 |
(if (Completion.Word_Parsers.is_word(name)) List((name, name)) else Nil)) |
63867 | 69 |
new Outer_Syntax(keywords1, completion1, rev_abbrevs, language_context, true) |
53280
c63a016805b9
explicit indication of outer syntax with no tokens;
wenzelm
parents:
52439
diff
changeset
|
70 |
} |
48706 | 71 |
|
48873 | 72 |
def add_keywords(keywords: Thy_Header.Keywords): Outer_Syntax = |
73 |
(this /: keywords) { |
|
63579 | 74 |
case (syntax, (name, ((kind, tags), _))) => |
75 |
syntax + (Symbol.decode(name), kind, tags) + (Symbol.encode(name), kind, tags) |
|
76 |
} |
|
77 |
||
63867 | 78 |
|
79 |
/* abbrevs */ |
|
80 |
||
81 |
def abbrevs: Thy_Header.Abbrevs = rev_abbrevs.reverse |
|
82 |
||
83 |
def add_abbrevs(new_abbrevs: Thy_Header.Abbrevs): Outer_Syntax = |
|
84 |
if (new_abbrevs.isEmpty) this |
|
63579 | 85 |
else { |
86 |
val completion1 = |
|
87 |
completion.add_abbrevs( |
|
63867 | 88 |
(for ((a, b) <- new_abbrevs) yield { |
63579 | 89 |
val a1 = Symbol.decode(a) |
90 |
val a2 = Symbol.encode(a) |
|
91 |
val b1 = Symbol.decode(b) |
|
92 |
List((a1, b1), (a2, b1)) |
|
93 |
}).flatten) |
|
63867 | 94 |
val rev_abbrevs1 = Library.distinct(new_abbrevs) reverse_::: rev_abbrevs |
95 |
new Outer_Syntax(keywords, completion1, rev_abbrevs1, language_context, has_tokens) |
|
46940 | 96 |
} |
34166 | 97 |
|
58695 | 98 |
|
59073 | 99 |
/* merge */ |
100 |
||
63584
68751fe1c036
tuned signature -- prover-independence is presently theoretical;
wenzelm
parents:
63579
diff
changeset
|
101 |
def ++ (other: Outer_Syntax): Outer_Syntax = |
59073 | 102 |
if (this eq other) this |
103 |
else { |
|
63865 | 104 |
val keywords1 = keywords ++ other.keywords |
105 |
val completion1 = completion ++ other.completion |
|
63867 | 106 |
val rev_abbrevs1 = Library.merge(rev_abbrevs, other.rev_abbrevs) |
59077
7e0d3da6e6d8
node-specific syntax, with base_syntax as default;
wenzelm
parents:
59073
diff
changeset
|
107 |
if ((keywords eq keywords1) && (completion eq completion1)) this |
63867 | 108 |
else new Outer_Syntax(keywords1, completion1, rev_abbrevs1, language_context, has_tokens) |
59073 | 109 |
} |
110 |
||
111 |
||
59735 | 112 |
/* load commands */ |
58900 | 113 |
|
63441 | 114 |
def load_command(name: String): Option[List[String]] = keywords.load_commands.get(name) |
58900 | 115 |
def load_commands_in(text: String): Boolean = keywords.load_commands_in(text) |
116 |
||
117 |
||
58706 | 118 |
/* language context */ |
34166 | 119 |
|
58706 | 120 |
def set_language_context(context: Completion.Language_Context): Outer_Syntax = |
63867 | 121 |
new Outer_Syntax(keywords, completion, rev_abbrevs, context, has_tokens) |
58706 | 122 |
|
123 |
def no_tokens: Outer_Syntax = |
|
46969 | 124 |
{ |
58900 | 125 |
require(keywords.is_empty) |
58706 | 126 |
new Outer_Syntax( |
127 |
completion = completion, |
|
63867 | 128 |
rev_abbrevs = rev_abbrevs, |
58706 | 129 |
language_context = language_context, |
130 |
has_tokens = false) |
|
46969 | 131 |
} |
40454
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38471
diff
changeset
|
132 |
|
58706 | 133 |
|
40454
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38471
diff
changeset
|
134 |
|
58706 | 135 |
/** parsing **/ |
34166 | 136 |
|
58706 | 137 |
/* command spans */ |
57905
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
138 |
|
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
139 |
def parse_spans(toks: List[Token]): List[Command_Span.Span] = |
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
140 |
{ |
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
141 |
val result = new mutable.ListBuffer[Command_Span.Span] |
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
142 |
val content = new mutable.ListBuffer[Token] |
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
143 |
val improper = new mutable.ListBuffer[Token] |
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
144 |
|
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
145 |
def ship(span: List[Token]) |
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
146 |
{ |
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
147 |
val kind = |
59924
801b979ec0c2
more general notion of command span: command keyword not necessarily at start;
wenzelm
parents:
59735
diff
changeset
|
148 |
if (span.forall(_.is_improper)) Command_Span.Ignored_Span |
801b979ec0c2
more general notion of command span: command keyword not necessarily at start;
wenzelm
parents:
59735
diff
changeset
|
149 |
else if (span.exists(_.is_error)) Command_Span.Malformed_Span |
801b979ec0c2
more general notion of command span: command keyword not necessarily at start;
wenzelm
parents:
59735
diff
changeset
|
150 |
else |
801b979ec0c2
more general notion of command span: command keyword not necessarily at start;
wenzelm
parents:
59735
diff
changeset
|
151 |
span.find(_.is_command) match { |
801b979ec0c2
more general notion of command span: command keyword not necessarily at start;
wenzelm
parents:
59735
diff
changeset
|
152 |
case None => Command_Span.Malformed_Span |
801b979ec0c2
more general notion of command span: command keyword not necessarily at start;
wenzelm
parents:
59735
diff
changeset
|
153 |
case Some(cmd) => |
801b979ec0c2
more general notion of command span: command keyword not necessarily at start;
wenzelm
parents:
59735
diff
changeset
|
154 |
val name = cmd.source |
801b979ec0c2
more general notion of command span: command keyword not necessarily at start;
wenzelm
parents:
59735
diff
changeset
|
155 |
val offset = |
801b979ec0c2
more general notion of command span: command keyword not necessarily at start;
wenzelm
parents:
59735
diff
changeset
|
156 |
(0 /: span.takeWhile(_ != cmd)) { |
64616 | 157 |
case (i, tok) => i + Symbol.length(tok.source) } |
158 |
val end_offset = offset + Symbol.length(name) |
|
59924
801b979ec0c2
more general notion of command span: command keyword not necessarily at start;
wenzelm
parents:
59735
diff
changeset
|
159 |
val pos = Position.Range(Text.Range(offset, end_offset) + 1) |
801b979ec0c2
more general notion of command span: command keyword not necessarily at start;
wenzelm
parents:
59735
diff
changeset
|
160 |
Command_Span.Command_Span(name, pos) |
801b979ec0c2
more general notion of command span: command keyword not necessarily at start;
wenzelm
parents:
59735
diff
changeset
|
161 |
} |
57905
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
162 |
result += Command_Span.Span(kind, span) |
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
163 |
} |
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
164 |
|
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
165 |
def flush() |
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
166 |
{ |
59319 | 167 |
if (content.nonEmpty) { ship(content.toList); content.clear } |
168 |
if (improper.nonEmpty) { ship(improper.toList); improper.clear } |
|
57905
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
169 |
} |
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
170 |
|
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
171 |
for (tok <- toks) { |
59924
801b979ec0c2
more general notion of command span: command keyword not necessarily at start;
wenzelm
parents:
59735
diff
changeset
|
172 |
if (tok.is_improper) improper += tok |
63441 | 173 |
else if (keywords.is_before_command(tok) || |
174 |
tok.is_command && |
|
175 |
(!content.exists(keywords.is_before_command(_)) || content.exists(_.is_command))) |
|
59924
801b979ec0c2
more general notion of command span: command keyword not necessarily at start;
wenzelm
parents:
59735
diff
changeset
|
176 |
{ flush(); content += tok } |
57905
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
177 |
else { content ++= improper; improper.clear; content += tok } |
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
178 |
} |
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
179 |
flush() |
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
180 |
|
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
181 |
result.toList |
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
182 |
} |
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
183 |
|
57906 | 184 |
def parse_spans(input: CharSequence): List[Command_Span.Span] = |
59083 | 185 |
parse_spans(Token.explode(keywords, input)) |
34166 | 186 |
} |