author | Fabian Huch <huch@in.tum.de> |
Thu, 18 Jul 2024 13:08:11 +0200 | |
changeset 80574 | 90493e889dff |
parent 80487 | e25c6d4c219c |
permissions | -rw-r--r-- |
34166 | 1 |
/* Title: Pure/Isar/outer_syntax.scala |
2 |
Author: Makarius |
|
3 |
||
4 |
Isabelle/Isar outer syntax. |
|
5 |
*/ |
|
6 |
||
7 |
package isabelle |
|
8 |
||
9 |
||
43411
0206466ee473
some support for partial scans with explicit context;
wenzelm
parents:
40533
diff
changeset
|
10 |
import scala.collection.mutable |
34166 | 11 |
|
12 |
||
75393 | 13 |
object Outer_Syntax { |
58706 | 14 |
/* syntax */ |
15 |
||
16 |
val empty: Outer_Syntax = new Outer_Syntax() |
|
17 |
||
73359 | 18 |
def merge(syns: List[Outer_Syntax]): Outer_Syntax = syns.foldLeft(empty)(_ ++ _) |
66717
67dbf5cdc056
more informative loaded_theories: dependencies and syntax;
wenzelm
parents:
65384
diff
changeset
|
19 |
|
58706 | 20 |
|
21 |
/* string literals */ |
|
22 |
||
80441 | 23 |
def quote_string(str: String): String = |
24 |
Library.string_builder(hint = str.length + 10) { result => |
|
25 |
result += '"' |
|
26 |
for (s <- Symbol.iterator(str)) { |
|
27 |
if (s.length == 1) { |
|
28 |
val c = s(0) |
|
80487 | 29 |
if (c < 32 && c != YXML.X_char && c != YXML.Y_char || c == '\\' || c == '"') { |
80441 | 30 |
result += '\\' |
31 |
if (c < 10) result += '0' |
|
32 |
if (c < 100) result += '0' |
|
33 |
result ++= c.asInstanceOf[Int].toString |
|
34 |
} |
|
35 |
else result += c |
|
43774
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
36 |
} |
80441 | 37 |
else result ++= s |
43774
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
38 |
} |
80441 | 39 |
result += '"' |
43774
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
40 |
} |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
41 |
} |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
42 |
|
46712 | 43 |
final class Outer_Syntax private( |
58900 | 44 |
val keywords: Keyword.Keywords = Keyword.Keywords.empty, |
63867 | 45 |
val rev_abbrevs: Thy_Header.Abbrevs = Nil, |
55749 | 46 |
val language_context: Completion.Language_Context = Completion.Language_Context.outer, |
75393 | 47 |
val has_tokens: Boolean = true |
48 |
) { |
|
58706 | 49 |
/** syntax content **/ |
50 |
||
58900 | 51 |
override def toString: String = keywords.toString |
56393
22f533e6a049
more abstract Prover.Syntax, as proposed by Carst Tankink;
wenzelm
parents:
56314
diff
changeset
|
52 |
|
58695 | 53 |
|
63867 | 54 |
/* keywords */ |
58695 | 55 |
|
72748
04d5f6d769a7
more flexible syntax for theory load commands via Isabelle/Scala;
wenzelm
parents:
72740
diff
changeset
|
56 |
def + (name: String, kind: String = "", load_command: String = ""): Outer_Syntax = |
67004
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
wenzelm
parents:
66984
diff
changeset
|
57 |
new Outer_Syntax( |
72748
04d5f6d769a7
more flexible syntax for theory load commands via Isabelle/Scala;
wenzelm
parents:
72740
diff
changeset
|
58 |
keywords + (name, kind, load_command), rev_abbrevs, language_context, has_tokens = true) |
48706 | 59 |
|
48873 | 60 |
def add_keywords(keywords: Thy_Header.Keywords): Outer_Syntax = |
73359 | 61 |
keywords.foldLeft(this) { |
65384 | 62 |
case (syntax, (name, spec)) => |
63 |
syntax + |
|
72748
04d5f6d769a7
more flexible syntax for theory load commands via Isabelle/Scala;
wenzelm
parents:
72740
diff
changeset
|
64 |
(Symbol.decode(name), spec.kind, spec.load_command) + |
04d5f6d769a7
more flexible syntax for theory load commands via Isabelle/Scala;
wenzelm
parents:
72740
diff
changeset
|
65 |
(Symbol.encode(name), spec.kind, spec.load_command) |
63579 | 66 |
} |
67 |
||
63867 | 68 |
|
69 |
/* abbrevs */ |
|
70 |
||
71 |
def abbrevs: Thy_Header.Abbrevs = rev_abbrevs.reverse |
|
72 |
||
73 |
def add_abbrevs(new_abbrevs: Thy_Header.Abbrevs): Outer_Syntax = |
|
74 |
if (new_abbrevs.isEmpty) this |
|
63579 | 75 |
else { |
67004
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
wenzelm
parents:
66984
diff
changeset
|
76 |
val rev_abbrevs1 = Library.distinct(new_abbrevs) reverse_::: rev_abbrevs |
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
wenzelm
parents:
66984
diff
changeset
|
77 |
new Outer_Syntax(keywords, rev_abbrevs1, language_context, has_tokens) |
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
wenzelm
parents:
66984
diff
changeset
|
78 |
} |
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
wenzelm
parents:
66984
diff
changeset
|
79 |
|
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
wenzelm
parents:
66984
diff
changeset
|
80 |
|
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
wenzelm
parents:
66984
diff
changeset
|
81 |
/* completion */ |
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
wenzelm
parents:
66984
diff
changeset
|
82 |
|
75393 | 83 |
private lazy val completion: Completion = { |
67004
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
wenzelm
parents:
66984
diff
changeset
|
84 |
val completion_keywords = (keywords.minor.iterator ++ keywords.major.iterator).toList |
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
wenzelm
parents:
66984
diff
changeset
|
85 |
val completion_abbrevs = |
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
wenzelm
parents:
66984
diff
changeset
|
86 |
completion_keywords.flatMap(name => |
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
wenzelm
parents:
66984
diff
changeset
|
87 |
(if (Keyword.theory_block.contains(keywords.kinds(name))) |
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
wenzelm
parents:
66984
diff
changeset
|
88 |
List((name, name + "\nbegin\n\u0007\nend")) |
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
wenzelm
parents:
66984
diff
changeset
|
89 |
else Nil) ::: |
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
wenzelm
parents:
66984
diff
changeset
|
90 |
(if (Completion.Word_Parsers.is_word(name)) List((name, name)) else Nil)) ::: |
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
wenzelm
parents:
66984
diff
changeset
|
91 |
abbrevs.flatMap( |
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
wenzelm
parents:
66984
diff
changeset
|
92 |
{ case (a, b) => |
63579 | 93 |
val a1 = Symbol.decode(a) |
94 |
val a2 = Symbol.encode(a) |
|
95 |
val b1 = Symbol.decode(b) |
|
96 |
List((a1, b1), (a2, b1)) |
|
67004
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
wenzelm
parents:
66984
diff
changeset
|
97 |
}) |
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
wenzelm
parents:
66984
diff
changeset
|
98 |
Completion.make(completion_keywords, completion_abbrevs) |
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
wenzelm
parents:
66984
diff
changeset
|
99 |
} |
34166 | 100 |
|
67005 | 101 |
def complete( |
102 |
history: Completion.History, |
|
103 |
unicode: Boolean, |
|
104 |
explicit: Boolean, |
|
105 |
start: Text.Offset, |
|
106 |
text: CharSequence, |
|
107 |
caret: Int, |
|
75393 | 108 |
context: Completion.Language_Context |
109 |
): Option[Completion.Result] = { |
|
67005 | 110 |
completion.complete(history, unicode, explicit, start, text, caret, context) |
111 |
} |
|
112 |
||
58695 | 113 |
|
66717
67dbf5cdc056
more informative loaded_theories: dependencies and syntax;
wenzelm
parents:
65384
diff
changeset
|
114 |
/* build */ |
67dbf5cdc056
more informative loaded_theories: dependencies and syntax;
wenzelm
parents:
65384
diff
changeset
|
115 |
|
67dbf5cdc056
more informative loaded_theories: dependencies and syntax;
wenzelm
parents:
65384
diff
changeset
|
116 |
def + (header: Document.Node.Header): Outer_Syntax = |
67dbf5cdc056
more informative loaded_theories: dependencies and syntax;
wenzelm
parents:
65384
diff
changeset
|
117 |
add_keywords(header.keywords).add_abbrevs(header.abbrevs) |
59073 | 118 |
|
63584
68751fe1c036
tuned signature -- prover-independence is presently theoretical;
wenzelm
parents:
63579
diff
changeset
|
119 |
def ++ (other: Outer_Syntax): Outer_Syntax = |
59073 | 120 |
if (this eq other) this |
66776 | 121 |
else if (this eq Outer_Syntax.empty) other |
59073 | 122 |
else { |
63865 | 123 |
val keywords1 = keywords ++ other.keywords |
63867 | 124 |
val rev_abbrevs1 = Library.merge(rev_abbrevs, other.rev_abbrevs) |
67004
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
wenzelm
parents:
66984
diff
changeset
|
125 |
if ((keywords eq keywords1) && (rev_abbrevs eq rev_abbrevs1)) this |
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
wenzelm
parents:
66984
diff
changeset
|
126 |
else new Outer_Syntax(keywords1, rev_abbrevs1, language_context, has_tokens) |
59073 | 127 |
} |
128 |
||
129 |
||
59735 | 130 |
/* load commands */ |
58900 | 131 |
|
72748
04d5f6d769a7
more flexible syntax for theory load commands via Isabelle/Scala;
wenzelm
parents:
72740
diff
changeset
|
132 |
def load_command(name: String): Option[String] = |
72740 | 133 |
keywords.load_commands.get(name) |
134 |
||
135 |
def has_load_commands(text: String): Boolean = |
|
136 |
keywords.load_commands.exists({ case (cmd, _) => text.containsSlice(cmd) }) |
|
58900 | 137 |
|
138 |
||
58706 | 139 |
/* language context */ |
34166 | 140 |
|
58706 | 141 |
def set_language_context(context: Completion.Language_Context): Outer_Syntax = |
67004
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
wenzelm
parents:
66984
diff
changeset
|
142 |
new Outer_Syntax(keywords, rev_abbrevs, context, has_tokens) |
58706 | 143 |
|
75393 | 144 |
def no_tokens: Outer_Syntax = { |
73120
c3589f2dff31
more informative errors: simplify diagnosis of spurious failures reported by users;
wenzelm
parents:
73115
diff
changeset
|
145 |
require(keywords.is_empty, "bad syntax keywords") |
58706 | 146 |
new Outer_Syntax( |
63867 | 147 |
rev_abbrevs = rev_abbrevs, |
58706 | 148 |
language_context = language_context, |
149 |
has_tokens = false) |
|
46969 | 150 |
} |
40454
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38471
diff
changeset
|
151 |
|
58706 | 152 |
|
40454
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38471
diff
changeset
|
153 |
|
58706 | 154 |
/** parsing **/ |
34166 | 155 |
|
58706 | 156 |
/* command spans */ |
57905
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
157 |
|
75393 | 158 |
def parse_spans(toks: List[Token]): List[Command_Span.Span] = { |
57905
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
159 |
val result = new mutable.ListBuffer[Command_Span.Span] |
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
160 |
val content = new mutable.ListBuffer[Token] |
68729
3a02b424d5fb
clarified ignored span / core range: include formal comments, e.g. relevant for error messages from antiquotations;
wenzelm
parents:
67005
diff
changeset
|
161 |
val ignored = new mutable.ListBuffer[Token] |
57905
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
162 |
|
75393 | 163 |
def ship(content: List[Token]): Unit = { |
57905
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
164 |
val kind = |
72800 | 165 |
if (content.forall(_.is_ignored)) Command_Span.Ignored_Span |
166 |
else if (content.exists(_.is_error)) Command_Span.Malformed_Span |
|
59924
801b979ec0c2
more general notion of command span: command keyword not necessarily at start;
wenzelm
parents:
59735
diff
changeset
|
167 |
else |
72800 | 168 |
content.find(_.is_command) match { |
59924
801b979ec0c2
more general notion of command span: command keyword not necessarily at start;
wenzelm
parents:
59735
diff
changeset
|
169 |
case None => Command_Span.Malformed_Span |
801b979ec0c2
more general notion of command span: command keyword not necessarily at start;
wenzelm
parents:
59735
diff
changeset
|
170 |
case Some(cmd) => |
801b979ec0c2
more general notion of command span: command keyword not necessarily at start;
wenzelm
parents:
59735
diff
changeset
|
171 |
val name = cmd.source |
78912
ff4496b25197
clarified signature: Command_Span.Kind already contains keyword_kind, so parsing document structure no longer requires Keyword.Keywords;
wenzelm
parents:
75393
diff
changeset
|
172 |
val keyword_kind = keywords.kinds.get(name) |
59924
801b979ec0c2
more general notion of command span: command keyword not necessarily at start;
wenzelm
parents:
59735
diff
changeset
|
173 |
val offset = |
73359 | 174 |
content.takeWhile(_ != cmd).foldLeft(0) { |
175 |
case (i, tok) => i + Symbol.length(tok.source) |
|
176 |
} |
|
64616 | 177 |
val end_offset = offset + Symbol.length(name) |
72800 | 178 |
val range = Text.Range(offset, end_offset) + 1 |
78912
ff4496b25197
clarified signature: Command_Span.Kind already contains keyword_kind, so parsing document structure no longer requires Keyword.Keywords;
wenzelm
parents:
75393
diff
changeset
|
179 |
Command_Span.Command_Span(keyword_kind, name, Position.Range(range)) |
59924
801b979ec0c2
more general notion of command span: command keyword not necessarily at start;
wenzelm
parents:
59735
diff
changeset
|
180 |
} |
72800 | 181 |
result += Command_Span.Span(kind, content) |
57905
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
182 |
} |
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
183 |
|
75393 | 184 |
def flush(): Unit = { |
73367 | 185 |
if (content.nonEmpty) { ship(content.toList); content.clear() } |
186 |
if (ignored.nonEmpty) { ship(ignored.toList); ignored.clear() } |
|
57905
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
187 |
} |
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
188 |
|
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
189 |
for (tok <- toks) { |
68729
3a02b424d5fb
clarified ignored span / core range: include formal comments, e.g. relevant for error messages from antiquotations;
wenzelm
parents:
67005
diff
changeset
|
190 |
if (tok.is_ignored) ignored += tok |
63441 | 191 |
else if (keywords.is_before_command(tok) || |
192 |
tok.is_command && |
|
75393 | 193 |
(!content.exists(keywords.is_before_command) || content.exists(_.is_command))) { |
194 |
flush(); content += tok |
|
195 |
} |
|
73367 | 196 |
else { content ++= ignored; ignored.clear(); content += tok } |
57905
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
197 |
} |
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
198 |
flush() |
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
199 |
|
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
200 |
result.toList |
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
201 |
} |
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
202 |
|
57906 | 203 |
def parse_spans(input: CharSequence): List[Command_Span.Span] = |
59083 | 204 |
parse_spans(Token.explode(keywords, input)) |
34166 | 205 |
} |