author | wenzelm |
Thu, 06 Jun 2024 22:26:40 +0200 | |
changeset 80274 | cff00b3dddf5 |
parent 78912 | ff4496b25197 |
child 80441 | c420429fdf4c |
permissions | -rw-r--r-- |
34166 | 1 |
/* Title: Pure/Isar/outer_syntax.scala |
2 |
Author: Makarius |
|
3 |
||
4 |
Isabelle/Isar outer syntax. |
|
5 |
*/ |
|
6 |
||
7 |
package isabelle |
|
8 |
||
9 |
||
43411
0206466ee473
some support for partial scans with explicit context;
wenzelm
parents:
40533
diff
changeset
|
10 |
import scala.collection.mutable |
34166 | 11 |
|
12 |
||
75393 | 13 |
object Outer_Syntax { |
58706 | 14 |
/* syntax */ |
15 |
||
16 |
val empty: Outer_Syntax = new Outer_Syntax() |
|
17 |
||
73359 | 18 |
def merge(syns: List[Outer_Syntax]): Outer_Syntax = syns.foldLeft(empty)(_ ++ _) |
66717
67dbf5cdc056
more informative loaded_theories: dependencies and syntax;
wenzelm
parents:
65384
diff
changeset
|
19 |
|
58706 | 20 |
|
21 |
/* string literals */ |
|
22 |
||
75393 | 23 |
def quote_string(str: String): String = { |
43774
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
24 |
val result = new StringBuilder(str.length + 10) |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
25 |
result += '"' |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
26 |
for (s <- Symbol.iterator(str)) { |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
27 |
if (s.length == 1) { |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
28 |
val c = s(0) |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
29 |
if (c < 32 && c != YXML.X && c != YXML.Y || c == '\\' || c == '"') { |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
30 |
result += '\\' |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
31 |
if (c < 10) result += '0' |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
32 |
if (c < 100) result += '0' |
60215 | 33 |
result ++= c.asInstanceOf[Int].toString |
43774
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
34 |
} |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
35 |
else result += c |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
36 |
} |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
37 |
else result ++= s |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
38 |
} |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
39 |
result += '"' |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
40 |
result.toString |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
41 |
} |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
42 |
} |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
43 |
|
46712 | 44 |
final class Outer_Syntax private( |
58900 | 45 |
val keywords: Keyword.Keywords = Keyword.Keywords.empty, |
63867 | 46 |
val rev_abbrevs: Thy_Header.Abbrevs = Nil, |
55749 | 47 |
val language_context: Completion.Language_Context = Completion.Language_Context.outer, |
75393 | 48 |
val has_tokens: Boolean = true |
49 |
) { |
|
58706 | 50 |
/** syntax content **/ |
51 |
||
58900 | 52 |
override def toString: String = keywords.toString |
56393
22f533e6a049
more abstract Prover.Syntax, as proposed by Carst Tankink;
wenzelm
parents:
56314
diff
changeset
|
53 |
|
58695 | 54 |
|
63867 | 55 |
/* keywords */ |
58695 | 56 |
|
72748
04d5f6d769a7
more flexible syntax for theory load commands via Isabelle/Scala;
wenzelm
parents:
72740
diff
changeset
|
57 |
def + (name: String, kind: String = "", load_command: String = ""): Outer_Syntax = |
67004
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
wenzelm
parents:
66984
diff
changeset
|
58 |
new Outer_Syntax( |
72748
04d5f6d769a7
more flexible syntax for theory load commands via Isabelle/Scala;
wenzelm
parents:
72740
diff
changeset
|
59 |
keywords + (name, kind, load_command), rev_abbrevs, language_context, has_tokens = true) |
48706 | 60 |
|
48873 | 61 |
def add_keywords(keywords: Thy_Header.Keywords): Outer_Syntax = |
73359 | 62 |
keywords.foldLeft(this) { |
65384 | 63 |
case (syntax, (name, spec)) => |
64 |
syntax + |
|
72748
04d5f6d769a7
more flexible syntax for theory load commands via Isabelle/Scala;
wenzelm
parents:
72740
diff
changeset
|
65 |
(Symbol.decode(name), spec.kind, spec.load_command) + |
04d5f6d769a7
more flexible syntax for theory load commands via Isabelle/Scala;
wenzelm
parents:
72740
diff
changeset
|
66 |
(Symbol.encode(name), spec.kind, spec.load_command) |
63579 | 67 |
} |
68 |
||
63867 | 69 |
|
70 |
/* abbrevs */ |
|
71 |
||
72 |
def abbrevs: Thy_Header.Abbrevs = rev_abbrevs.reverse |
|
73 |
||
74 |
def add_abbrevs(new_abbrevs: Thy_Header.Abbrevs): Outer_Syntax = |
|
75 |
if (new_abbrevs.isEmpty) this |
|
63579 | 76 |
else { |
67004
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
wenzelm
parents:
66984
diff
changeset
|
77 |
val rev_abbrevs1 = Library.distinct(new_abbrevs) reverse_::: rev_abbrevs |
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
wenzelm
parents:
66984
diff
changeset
|
78 |
new Outer_Syntax(keywords, rev_abbrevs1, language_context, has_tokens) |
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
wenzelm
parents:
66984
diff
changeset
|
79 |
} |
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
wenzelm
parents:
66984
diff
changeset
|
80 |
|
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
wenzelm
parents:
66984
diff
changeset
|
81 |
|
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
wenzelm
parents:
66984
diff
changeset
|
82 |
/* completion */ |
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
wenzelm
parents:
66984
diff
changeset
|
83 |
|
75393 | 84 |
private lazy val completion: Completion = { |
67004
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
wenzelm
parents:
66984
diff
changeset
|
85 |
val completion_keywords = (keywords.minor.iterator ++ keywords.major.iterator).toList |
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
wenzelm
parents:
66984
diff
changeset
|
86 |
val completion_abbrevs = |
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
wenzelm
parents:
66984
diff
changeset
|
87 |
completion_keywords.flatMap(name => |
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
wenzelm
parents:
66984
diff
changeset
|
88 |
(if (Keyword.theory_block.contains(keywords.kinds(name))) |
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
wenzelm
parents:
66984
diff
changeset
|
89 |
List((name, name + "\nbegin\n\u0007\nend")) |
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
wenzelm
parents:
66984
diff
changeset
|
90 |
else Nil) ::: |
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
wenzelm
parents:
66984
diff
changeset
|
91 |
(if (Completion.Word_Parsers.is_word(name)) List((name, name)) else Nil)) ::: |
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
wenzelm
parents:
66984
diff
changeset
|
92 |
abbrevs.flatMap( |
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
wenzelm
parents:
66984
diff
changeset
|
93 |
{ case (a, b) => |
63579 | 94 |
val a1 = Symbol.decode(a) |
95 |
val a2 = Symbol.encode(a) |
|
96 |
val b1 = Symbol.decode(b) |
|
97 |
List((a1, b1), (a2, b1)) |
|
67004
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
wenzelm
parents:
66984
diff
changeset
|
98 |
}) |
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
wenzelm
parents:
66984
diff
changeset
|
99 |
Completion.make(completion_keywords, completion_abbrevs) |
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
wenzelm
parents:
66984
diff
changeset
|
100 |
} |
34166 | 101 |
|
67005 | 102 |
def complete( |
103 |
history: Completion.History, |
|
104 |
unicode: Boolean, |
|
105 |
explicit: Boolean, |
|
106 |
start: Text.Offset, |
|
107 |
text: CharSequence, |
|
108 |
caret: Int, |
|
75393 | 109 |
context: Completion.Language_Context |
110 |
): Option[Completion.Result] = { |
|
67005 | 111 |
completion.complete(history, unicode, explicit, start, text, caret, context) |
112 |
} |
|
113 |
||
58695 | 114 |
|
66717
67dbf5cdc056
more informative loaded_theories: dependencies and syntax;
wenzelm
parents:
65384
diff
changeset
|
115 |
/* build */ |
67dbf5cdc056
more informative loaded_theories: dependencies and syntax;
wenzelm
parents:
65384
diff
changeset
|
116 |
|
67dbf5cdc056
more informative loaded_theories: dependencies and syntax;
wenzelm
parents:
65384
diff
changeset
|
117 |
def + (header: Document.Node.Header): Outer_Syntax = |
67dbf5cdc056
more informative loaded_theories: dependencies and syntax;
wenzelm
parents:
65384
diff
changeset
|
118 |
add_keywords(header.keywords).add_abbrevs(header.abbrevs) |
59073 | 119 |
|
63584
68751fe1c036
tuned signature -- prover-independence is presently theoretical;
wenzelm
parents:
63579
diff
changeset
|
120 |
def ++ (other: Outer_Syntax): Outer_Syntax = |
59073 | 121 |
if (this eq other) this |
66776 | 122 |
else if (this eq Outer_Syntax.empty) other |
59073 | 123 |
else { |
63865 | 124 |
val keywords1 = keywords ++ other.keywords |
63867 | 125 |
val rev_abbrevs1 = Library.merge(rev_abbrevs, other.rev_abbrevs) |
67004
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
wenzelm
parents:
66984
diff
changeset
|
126 |
if ((keywords eq keywords1) && (rev_abbrevs eq rev_abbrevs1)) this |
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
wenzelm
parents:
66984
diff
changeset
|
127 |
else new Outer_Syntax(keywords1, rev_abbrevs1, language_context, has_tokens) |
59073 | 128 |
} |
129 |
||
130 |
||
59735 | 131 |
/* load commands */ |
58900 | 132 |
|
72748
04d5f6d769a7
more flexible syntax for theory load commands via Isabelle/Scala;
wenzelm
parents:
72740
diff
changeset
|
133 |
def load_command(name: String): Option[String] = |
72740 | 134 |
keywords.load_commands.get(name) |
135 |
||
136 |
def has_load_commands(text: String): Boolean = |
|
137 |
keywords.load_commands.exists({ case (cmd, _) => text.containsSlice(cmd) }) |
|
58900 | 138 |
|
139 |
||
58706 | 140 |
/* language context */ |
34166 | 141 |
|
58706 | 142 |
def set_language_context(context: Completion.Language_Context): Outer_Syntax = |
67004
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
wenzelm
parents:
66984
diff
changeset
|
143 |
new Outer_Syntax(keywords, rev_abbrevs, context, has_tokens) |
58706 | 144 |
|
75393 | 145 |
def no_tokens: Outer_Syntax = { |
73120
c3589f2dff31
more informative errors: simplify diagnosis of spurious failures reported by users;
wenzelm
parents:
73115
diff
changeset
|
146 |
require(keywords.is_empty, "bad syntax keywords") |
58706 | 147 |
new Outer_Syntax( |
63867 | 148 |
rev_abbrevs = rev_abbrevs, |
58706 | 149 |
language_context = language_context, |
150 |
has_tokens = false) |
|
46969 | 151 |
} |
40454
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38471
diff
changeset
|
152 |
|
58706 | 153 |
|
40454
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38471
diff
changeset
|
154 |
|
58706 | 155 |
/** parsing **/ |
34166 | 156 |
|
58706 | 157 |
/* command spans */ |
57905
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
158 |
|
75393 | 159 |
def parse_spans(toks: List[Token]): List[Command_Span.Span] = { |
57905
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
160 |
val result = new mutable.ListBuffer[Command_Span.Span] |
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
161 |
val content = new mutable.ListBuffer[Token] |
68729
3a02b424d5fb
clarified ignored span / core range: include formal comments, e.g. relevant for error messages from antiquotations;
wenzelm
parents:
67005
diff
changeset
|
162 |
val ignored = new mutable.ListBuffer[Token] |
57905
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
163 |
|
75393 | 164 |
def ship(content: List[Token]): Unit = { |
57905
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
165 |
val kind = |
72800 | 166 |
if (content.forall(_.is_ignored)) Command_Span.Ignored_Span |
167 |
else if (content.exists(_.is_error)) Command_Span.Malformed_Span |
|
59924
801b979ec0c2
more general notion of command span: command keyword not necessarily at start;
wenzelm
parents:
59735
diff
changeset
|
168 |
else |
72800 | 169 |
content.find(_.is_command) match { |
59924
801b979ec0c2
more general notion of command span: command keyword not necessarily at start;
wenzelm
parents:
59735
diff
changeset
|
170 |
case None => Command_Span.Malformed_Span |
801b979ec0c2
more general notion of command span: command keyword not necessarily at start;
wenzelm
parents:
59735
diff
changeset
|
171 |
case Some(cmd) => |
801b979ec0c2
more general notion of command span: command keyword not necessarily at start;
wenzelm
parents:
59735
diff
changeset
|
172 |
val name = cmd.source |
78912
ff4496b25197
clarified signature: Command_Span.Kind already contains keyword_kind, so parsing document structure no longer requires Keyword.Keywords;
wenzelm
parents:
75393
diff
changeset
|
173 |
val keyword_kind = keywords.kinds.get(name) |
59924
801b979ec0c2
more general notion of command span: command keyword not necessarily at start;
wenzelm
parents:
59735
diff
changeset
|
174 |
val offset = |
73359 | 175 |
content.takeWhile(_ != cmd).foldLeft(0) { |
176 |
case (i, tok) => i + Symbol.length(tok.source) |
|
177 |
} |
|
64616 | 178 |
val end_offset = offset + Symbol.length(name) |
72800 | 179 |
val range = Text.Range(offset, end_offset) + 1 |
78912
ff4496b25197
clarified signature: Command_Span.Kind already contains keyword_kind, so parsing document structure no longer requires Keyword.Keywords;
wenzelm
parents:
75393
diff
changeset
|
180 |
Command_Span.Command_Span(keyword_kind, name, Position.Range(range)) |
59924
801b979ec0c2
more general notion of command span: command keyword not necessarily at start;
wenzelm
parents:
59735
diff
changeset
|
181 |
} |
72800 | 182 |
result += Command_Span.Span(kind, content) |
57905
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
183 |
} |
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
184 |
|
75393 | 185 |
def flush(): Unit = { |
73367 | 186 |
if (content.nonEmpty) { ship(content.toList); content.clear() } |
187 |
if (ignored.nonEmpty) { ship(ignored.toList); ignored.clear() } |
|
57905
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
188 |
} |
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
189 |
|
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
190 |
for (tok <- toks) { |
68729
3a02b424d5fb
clarified ignored span / core range: include formal comments, e.g. relevant for error messages from antiquotations;
wenzelm
parents:
67005
diff
changeset
|
191 |
if (tok.is_ignored) ignored += tok |
63441 | 192 |
else if (keywords.is_before_command(tok) || |
193 |
tok.is_command && |
|
75393 | 194 |
(!content.exists(keywords.is_before_command) || content.exists(_.is_command))) { |
195 |
flush(); content += tok |
|
196 |
} |
|
73367 | 197 |
else { content ++= ignored; ignored.clear(); content += tok } |
57905
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
198 |
} |
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
199 |
flush() |
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
200 |
|
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
201 |
result.toList |
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
202 |
} |
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
203 |
|
57906 | 204 |
def parse_spans(input: CharSequence): List[Command_Span.Span] = |
59083 | 205 |
parse_spans(Token.explode(keywords, input)) |
34166 | 206 |
} |