| author | wenzelm |
| Wed, 11 Dec 2024 12:03:01 +0100 | |
| changeset 81580 | 2e7073976c25 |
| parent 80487 | e25c6d4c219c |
| permissions | -rw-r--r-- |
| 34166 | 1 |
/* Title: Pure/Isar/outer_syntax.scala |
2 |
Author: Makarius |
|
3 |
||
4 |
Isabelle/Isar outer syntax. |
|
5 |
*/ |
|
6 |
||
7 |
package isabelle |
|
8 |
||
9 |
||
|
43411
0206466ee473
some support for partial scans with explicit context;
wenzelm
parents:
40533
diff
changeset
|
10 |
import scala.collection.mutable |
| 34166 | 11 |
|
12 |
||
| 75393 | 13 |
object Outer_Syntax {
|
| 58706 | 14 |
/* syntax */ |
15 |
||
16 |
val empty: Outer_Syntax = new Outer_Syntax() |
|
17 |
||
| 73359 | 18 |
def merge(syns: List[Outer_Syntax]): Outer_Syntax = syns.foldLeft(empty)(_ ++ _) |
|
66717
67dbf5cdc056
more informative loaded_theories: dependencies and syntax;
wenzelm
parents:
65384
diff
changeset
|
19 |
|
| 58706 | 20 |
|
21 |
/* string literals */ |
|
22 |
||
| 80441 | 23 |
def quote_string(str: String): String = |
24 |
Library.string_builder(hint = str.length + 10) { result =>
|
|
25 |
result += '"' |
|
26 |
for (s <- Symbol.iterator(str)) {
|
|
27 |
if (s.length == 1) {
|
|
28 |
val c = s(0) |
|
| 80487 | 29 |
if (c < 32 && c != YXML.X_char && c != YXML.Y_char || c == '\\' || c == '"') {
|
| 80441 | 30 |
result += '\\' |
31 |
if (c < 10) result += '0' |
|
32 |
if (c < 100) result += '0' |
|
33 |
result ++= c.asInstanceOf[Int].toString |
|
34 |
} |
|
35 |
else result += c |
|
|
43774
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
36 |
} |
| 80441 | 37 |
else result ++= s |
|
43774
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
38 |
} |
| 80441 | 39 |
result += '"' |
|
43774
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
40 |
} |
|
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
41 |
} |
|
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
42 |
|
| 46712 | 43 |
final class Outer_Syntax private( |
| 58900 | 44 |
val keywords: Keyword.Keywords = Keyword.Keywords.empty, |
| 63867 | 45 |
val rev_abbrevs: Thy_Header.Abbrevs = Nil, |
| 55749 | 46 |
val language_context: Completion.Language_Context = Completion.Language_Context.outer, |
| 75393 | 47 |
val has_tokens: Boolean = true |
48 |
) {
|
|
| 58706 | 49 |
/** syntax content **/ |
50 |
||
| 58900 | 51 |
override def toString: String = keywords.toString |
|
56393
22f533e6a049
more abstract Prover.Syntax, as proposed by Carst Tankink;
wenzelm
parents:
56314
diff
changeset
|
52 |
|
| 58695 | 53 |
|
| 63867 | 54 |
/* keywords */ |
| 58695 | 55 |
|
|
72748
04d5f6d769a7
more flexible syntax for theory load commands via Isabelle/Scala;
wenzelm
parents:
72740
diff
changeset
|
56 |
def + (name: String, kind: String = "", load_command: String = ""): Outer_Syntax = |
|
67004
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
wenzelm
parents:
66984
diff
changeset
|
57 |
new Outer_Syntax( |
|
72748
04d5f6d769a7
more flexible syntax for theory load commands via Isabelle/Scala;
wenzelm
parents:
72740
diff
changeset
|
58 |
keywords + (name, kind, load_command), rev_abbrevs, language_context, has_tokens = true) |
| 48706 | 59 |
|
| 48873 | 60 |
def add_keywords(keywords: Thy_Header.Keywords): Outer_Syntax = |
| 73359 | 61 |
keywords.foldLeft(this) {
|
| 65384 | 62 |
case (syntax, (name, spec)) => |
63 |
syntax + |
|
|
72748
04d5f6d769a7
more flexible syntax for theory load commands via Isabelle/Scala;
wenzelm
parents:
72740
diff
changeset
|
64 |
(Symbol.decode(name), spec.kind, spec.load_command) + |
|
04d5f6d769a7
more flexible syntax for theory load commands via Isabelle/Scala;
wenzelm
parents:
72740
diff
changeset
|
65 |
(Symbol.encode(name), spec.kind, spec.load_command) |
| 63579 | 66 |
} |
67 |
||
| 63867 | 68 |
|
69 |
/* abbrevs */ |
|
70 |
||
71 |
def abbrevs: Thy_Header.Abbrevs = rev_abbrevs.reverse |
|
72 |
||
73 |
def add_abbrevs(new_abbrevs: Thy_Header.Abbrevs): Outer_Syntax = |
|
74 |
if (new_abbrevs.isEmpty) this |
|
| 63579 | 75 |
else {
|
|
67004
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
wenzelm
parents:
66984
diff
changeset
|
76 |
val rev_abbrevs1 = Library.distinct(new_abbrevs) reverse_::: rev_abbrevs |
|
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
wenzelm
parents:
66984
diff
changeset
|
77 |
new Outer_Syntax(keywords, rev_abbrevs1, language_context, has_tokens) |
|
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
wenzelm
parents:
66984
diff
changeset
|
78 |
} |
|
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
wenzelm
parents:
66984
diff
changeset
|
79 |
|
|
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
wenzelm
parents:
66984
diff
changeset
|
80 |
|
|
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
wenzelm
parents:
66984
diff
changeset
|
81 |
/* completion */ |
|
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
wenzelm
parents:
66984
diff
changeset
|
82 |
|
| 75393 | 83 |
private lazy val completion: Completion = {
|
|
67004
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
wenzelm
parents:
66984
diff
changeset
|
84 |
val completion_keywords = (keywords.minor.iterator ++ keywords.major.iterator).toList |
|
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
wenzelm
parents:
66984
diff
changeset
|
85 |
val completion_abbrevs = |
|
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
wenzelm
parents:
66984
diff
changeset
|
86 |
completion_keywords.flatMap(name => |
|
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
wenzelm
parents:
66984
diff
changeset
|
87 |
(if (Keyword.theory_block.contains(keywords.kinds(name))) |
|
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
wenzelm
parents:
66984
diff
changeset
|
88 |
List((name, name + "\nbegin\n\u0007\nend")) |
|
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
wenzelm
parents:
66984
diff
changeset
|
89 |
else Nil) ::: |
|
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
wenzelm
parents:
66984
diff
changeset
|
90 |
(if (Completion.Word_Parsers.is_word(name)) List((name, name)) else Nil)) ::: |
|
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
wenzelm
parents:
66984
diff
changeset
|
91 |
abbrevs.flatMap( |
|
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
wenzelm
parents:
66984
diff
changeset
|
92 |
{ case (a, b) =>
|
| 63579 | 93 |
val a1 = Symbol.decode(a) |
94 |
val a2 = Symbol.encode(a) |
|
95 |
val b1 = Symbol.decode(b) |
|
96 |
List((a1, b1), (a2, b1)) |
|
|
67004
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
wenzelm
parents:
66984
diff
changeset
|
97 |
}) |
|
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
wenzelm
parents:
66984
diff
changeset
|
98 |
Completion.make(completion_keywords, completion_abbrevs) |
|
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
wenzelm
parents:
66984
diff
changeset
|
99 |
} |
| 34166 | 100 |
|
| 67005 | 101 |
def complete( |
102 |
history: Completion.History, |
|
103 |
unicode: Boolean, |
|
104 |
explicit: Boolean, |
|
105 |
start: Text.Offset, |
|
106 |
text: CharSequence, |
|
107 |
caret: Int, |
|
| 75393 | 108 |
context: Completion.Language_Context |
109 |
): Option[Completion.Result] = {
|
|
| 67005 | 110 |
completion.complete(history, unicode, explicit, start, text, caret, context) |
111 |
} |
|
112 |
||
| 58695 | 113 |
|
|
66717
67dbf5cdc056
more informative loaded_theories: dependencies and syntax;
wenzelm
parents:
65384
diff
changeset
|
114 |
/* build */ |
|
67dbf5cdc056
more informative loaded_theories: dependencies and syntax;
wenzelm
parents:
65384
diff
changeset
|
115 |
|
|
67dbf5cdc056
more informative loaded_theories: dependencies and syntax;
wenzelm
parents:
65384
diff
changeset
|
116 |
def + (header: Document.Node.Header): Outer_Syntax = |
|
67dbf5cdc056
more informative loaded_theories: dependencies and syntax;
wenzelm
parents:
65384
diff
changeset
|
117 |
add_keywords(header.keywords).add_abbrevs(header.abbrevs) |
| 59073 | 118 |
|
|
63584
68751fe1c036
tuned signature -- prover-independence is presently theoretical;
wenzelm
parents:
63579
diff
changeset
|
119 |
def ++ (other: Outer_Syntax): Outer_Syntax = |
| 59073 | 120 |
if (this eq other) this |
| 66776 | 121 |
else if (this eq Outer_Syntax.empty) other |
| 59073 | 122 |
else {
|
| 63865 | 123 |
val keywords1 = keywords ++ other.keywords |
| 63867 | 124 |
val rev_abbrevs1 = Library.merge(rev_abbrevs, other.rev_abbrevs) |
|
67004
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
wenzelm
parents:
66984
diff
changeset
|
125 |
if ((keywords eq keywords1) && (rev_abbrevs eq rev_abbrevs1)) this |
|
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
wenzelm
parents:
66984
diff
changeset
|
126 |
else new Outer_Syntax(keywords1, rev_abbrevs1, language_context, has_tokens) |
| 59073 | 127 |
} |
128 |
||
129 |
||
| 59735 | 130 |
/* load commands */ |
| 58900 | 131 |
|
|
72748
04d5f6d769a7
more flexible syntax for theory load commands via Isabelle/Scala;
wenzelm
parents:
72740
diff
changeset
|
132 |
def load_command(name: String): Option[String] = |
| 72740 | 133 |
keywords.load_commands.get(name) |
134 |
||
135 |
def has_load_commands(text: String): Boolean = |
|
136 |
keywords.load_commands.exists({ case (cmd, _) => text.containsSlice(cmd) })
|
|
| 58900 | 137 |
|
138 |
||
| 58706 | 139 |
/* language context */ |
| 34166 | 140 |
|
| 58706 | 141 |
def set_language_context(context: Completion.Language_Context): Outer_Syntax = |
|
67004
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
wenzelm
parents:
66984
diff
changeset
|
142 |
new Outer_Syntax(keywords, rev_abbrevs, context, has_tokens) |
| 58706 | 143 |
|
| 75393 | 144 |
def no_tokens: Outer_Syntax = {
|
|
73120
c3589f2dff31
more informative errors: simplify diagnosis of spurious failures reported by users;
wenzelm
parents:
73115
diff
changeset
|
145 |
require(keywords.is_empty, "bad syntax keywords") |
| 58706 | 146 |
new Outer_Syntax( |
| 63867 | 147 |
rev_abbrevs = rev_abbrevs, |
| 58706 | 148 |
language_context = language_context, |
149 |
has_tokens = false) |
|
| 46969 | 150 |
} |
|
40454
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38471
diff
changeset
|
151 |
|
| 58706 | 152 |
|
|
40454
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38471
diff
changeset
|
153 |
|
| 58706 | 154 |
/** parsing **/ |
| 34166 | 155 |
|
| 58706 | 156 |
/* command spans */ |
|
57905
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
157 |
|
| 75393 | 158 |
def parse_spans(toks: List[Token]): List[Command_Span.Span] = {
|
|
57905
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
159 |
val result = new mutable.ListBuffer[Command_Span.Span] |
|
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
160 |
val content = new mutable.ListBuffer[Token] |
|
68729
3a02b424d5fb
clarified ignored span / core range: include formal comments, e.g. relevant for error messages from antiquotations;
wenzelm
parents:
67005
diff
changeset
|
161 |
val ignored = new mutable.ListBuffer[Token] |
|
57905
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
162 |
|
| 75393 | 163 |
def ship(content: List[Token]): Unit = {
|
|
57905
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
164 |
val kind = |
| 72800 | 165 |
if (content.forall(_.is_ignored)) Command_Span.Ignored_Span |
166 |
else if (content.exists(_.is_error)) Command_Span.Malformed_Span |
|
|
59924
801b979ec0c2
more general notion of command span: command keyword not necessarily at start;
wenzelm
parents:
59735
diff
changeset
|
167 |
else |
| 72800 | 168 |
content.find(_.is_command) match {
|
|
59924
801b979ec0c2
more general notion of command span: command keyword not necessarily at start;
wenzelm
parents:
59735
diff
changeset
|
169 |
case None => Command_Span.Malformed_Span |
|
801b979ec0c2
more general notion of command span: command keyword not necessarily at start;
wenzelm
parents:
59735
diff
changeset
|
170 |
case Some(cmd) => |
|
801b979ec0c2
more general notion of command span: command keyword not necessarily at start;
wenzelm
parents:
59735
diff
changeset
|
171 |
val name = cmd.source |
|
78912
ff4496b25197
clarified signature: Command_Span.Kind already contains keyword_kind, so parsing document structure no longer requires Keyword.Keywords;
wenzelm
parents:
75393
diff
changeset
|
172 |
val keyword_kind = keywords.kinds.get(name) |
|
59924
801b979ec0c2
more general notion of command span: command keyword not necessarily at start;
wenzelm
parents:
59735
diff
changeset
|
173 |
val offset = |
| 73359 | 174 |
content.takeWhile(_ != cmd).foldLeft(0) {
|
175 |
case (i, tok) => i + Symbol.length(tok.source) |
|
176 |
} |
|
| 64616 | 177 |
val end_offset = offset + Symbol.length(name) |
| 72800 | 178 |
val range = Text.Range(offset, end_offset) + 1 |
|
78912
ff4496b25197
clarified signature: Command_Span.Kind already contains keyword_kind, so parsing document structure no longer requires Keyword.Keywords;
wenzelm
parents:
75393
diff
changeset
|
179 |
Command_Span.Command_Span(keyword_kind, name, Position.Range(range)) |
|
59924
801b979ec0c2
more general notion of command span: command keyword not necessarily at start;
wenzelm
parents:
59735
diff
changeset
|
180 |
} |
| 72800 | 181 |
result += Command_Span.Span(kind, content) |
|
57905
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
182 |
} |
|
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
183 |
|
| 75393 | 184 |
def flush(): Unit = {
|
| 73367 | 185 |
if (content.nonEmpty) { ship(content.toList); content.clear() }
|
186 |
if (ignored.nonEmpty) { ship(ignored.toList); ignored.clear() }
|
|
|
57905
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
187 |
} |
|
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
188 |
|
|
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
189 |
for (tok <- toks) {
|
|
68729
3a02b424d5fb
clarified ignored span / core range: include formal comments, e.g. relevant for error messages from antiquotations;
wenzelm
parents:
67005
diff
changeset
|
190 |
if (tok.is_ignored) ignored += tok |
| 63441 | 191 |
else if (keywords.is_before_command(tok) || |
192 |
tok.is_command && |
|
| 75393 | 193 |
(!content.exists(keywords.is_before_command) || content.exists(_.is_command))) {
|
194 |
flush(); content += tok |
|
195 |
} |
|
| 73367 | 196 |
else { content ++= ignored; ignored.clear(); content += tok }
|
|
57905
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
197 |
} |
|
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
198 |
flush() |
|
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
199 |
|
|
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
200 |
result.toList |
|
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
201 |
} |
|
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
202 |
|
| 57906 | 203 |
def parse_spans(input: CharSequence): List[Command_Span.Span] = |
| 59083 | 204 |
parse_spans(Token.explode(keywords, input)) |
| 34166 | 205 |
} |