| author | wenzelm | 
| Tue, 22 Oct 2024 19:26:40 +0200 | |
| changeset 81232 | 9867b5cf3f7a | 
| parent 80487 | e25c6d4c219c | 
| permissions | -rw-r--r-- | 
| 34166 | 1  | 
/* Title: Pure/Isar/outer_syntax.scala  | 
2  | 
Author: Makarius  | 
|
3  | 
||
4  | 
Isabelle/Isar outer syntax.  | 
|
5  | 
*/  | 
|
6  | 
||
7  | 
package isabelle  | 
|
8  | 
||
9  | 
||
| 
43411
 
0206466ee473
some support for partial scans with explicit context;
 
wenzelm 
parents: 
40533 
diff
changeset
 | 
10  | 
import scala.collection.mutable  | 
| 34166 | 11  | 
|
12  | 
||
| 75393 | 13  | 
object Outer_Syntax {
 | 
| 58706 | 14  | 
/* syntax */  | 
15  | 
||
16  | 
val empty: Outer_Syntax = new Outer_Syntax()  | 
|
17  | 
||
| 73359 | 18  | 
def merge(syns: List[Outer_Syntax]): Outer_Syntax = syns.foldLeft(empty)(_ ++ _)  | 
| 
66717
 
67dbf5cdc056
more informative loaded_theories: dependencies and syntax;
 
wenzelm 
parents: 
65384 
diff
changeset
 | 
19  | 
|
| 58706 | 20  | 
|
21  | 
/* string literals */  | 
|
22  | 
||
| 80441 | 23  | 
def quote_string(str: String): String =  | 
24  | 
    Library.string_builder(hint = str.length + 10) { result =>
 | 
|
25  | 
result += '"'  | 
|
26  | 
      for (s <- Symbol.iterator(str)) {
 | 
|
27  | 
        if (s.length == 1) {
 | 
|
28  | 
val c = s(0)  | 
|
| 80487 | 29  | 
          if (c < 32 && c != YXML.X_char && c != YXML.Y_char || c == '\\' || c == '"') {
 | 
| 80441 | 30  | 
result += '\\'  | 
31  | 
if (c < 10) result += '0'  | 
|
32  | 
if (c < 100) result += '0'  | 
|
33  | 
result ++= c.asInstanceOf[Int].toString  | 
|
34  | 
}  | 
|
35  | 
else result += c  | 
|
| 
43774
 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 
wenzelm 
parents: 
43695 
diff
changeset
 | 
36  | 
}  | 
| 80441 | 37  | 
else result ++= s  | 
| 
43774
 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 
wenzelm 
parents: 
43695 
diff
changeset
 | 
38  | 
}  | 
| 80441 | 39  | 
result += '"'  | 
| 
43774
 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 
wenzelm 
parents: 
43695 
diff
changeset
 | 
40  | 
}  | 
| 
 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 
wenzelm 
parents: 
43695 
diff
changeset
 | 
41  | 
}  | 
| 
 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 
wenzelm 
parents: 
43695 
diff
changeset
 | 
42  | 
|
| 46712 | 43  | 
final class Outer_Syntax private(  | 
| 58900 | 44  | 
val keywords: Keyword.Keywords = Keyword.Keywords.empty,  | 
| 63867 | 45  | 
val rev_abbrevs: Thy_Header.Abbrevs = Nil,  | 
| 55749 | 46  | 
val language_context: Completion.Language_Context = Completion.Language_Context.outer,  | 
| 75393 | 47  | 
val has_tokens: Boolean = true  | 
48  | 
) {
 | 
|
| 58706 | 49  | 
/** syntax content **/  | 
50  | 
||
| 58900 | 51  | 
override def toString: String = keywords.toString  | 
| 
56393
 
22f533e6a049
more abstract Prover.Syntax, as proposed by Carst Tankink;
 
wenzelm 
parents: 
56314 
diff
changeset
 | 
52  | 
|
| 58695 | 53  | 
|
| 63867 | 54  | 
/* keywords */  | 
| 58695 | 55  | 
|
| 
72748
 
04d5f6d769a7
more flexible syntax for theory load commands via Isabelle/Scala;
 
wenzelm 
parents: 
72740 
diff
changeset
 | 
56  | 
def + (name: String, kind: String = "", load_command: String = ""): Outer_Syntax =  | 
| 
67004
 
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
 
wenzelm 
parents: 
66984 
diff
changeset
 | 
57  | 
new Outer_Syntax(  | 
| 
72748
 
04d5f6d769a7
more flexible syntax for theory load commands via Isabelle/Scala;
 
wenzelm 
parents: 
72740 
diff
changeset
 | 
58  | 
keywords + (name, kind, load_command), rev_abbrevs, language_context, has_tokens = true)  | 
| 48706 | 59  | 
|
| 48873 | 60  | 
def add_keywords(keywords: Thy_Header.Keywords): Outer_Syntax =  | 
| 73359 | 61  | 
    keywords.foldLeft(this) {
 | 
| 65384 | 62  | 
case (syntax, (name, spec)) =>  | 
63  | 
syntax +  | 
|
| 
72748
 
04d5f6d769a7
more flexible syntax for theory load commands via Isabelle/Scala;
 
wenzelm 
parents: 
72740 
diff
changeset
 | 
64  | 
(Symbol.decode(name), spec.kind, spec.load_command) +  | 
| 
 
04d5f6d769a7
more flexible syntax for theory load commands via Isabelle/Scala;
 
wenzelm 
parents: 
72740 
diff
changeset
 | 
65  | 
(Symbol.encode(name), spec.kind, spec.load_command)  | 
| 63579 | 66  | 
}  | 
67  | 
||
| 63867 | 68  | 
|
69  | 
/* abbrevs */  | 
|
70  | 
||
71  | 
def abbrevs: Thy_Header.Abbrevs = rev_abbrevs.reverse  | 
|
72  | 
||
73  | 
def add_abbrevs(new_abbrevs: Thy_Header.Abbrevs): Outer_Syntax =  | 
|
74  | 
if (new_abbrevs.isEmpty) this  | 
|
| 63579 | 75  | 
    else {
 | 
| 
67004
 
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
 
wenzelm 
parents: 
66984 
diff
changeset
 | 
76  | 
val rev_abbrevs1 = Library.distinct(new_abbrevs) reverse_::: rev_abbrevs  | 
| 
 
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
 
wenzelm 
parents: 
66984 
diff
changeset
 | 
77  | 
new Outer_Syntax(keywords, rev_abbrevs1, language_context, has_tokens)  | 
| 
 
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
 
wenzelm 
parents: 
66984 
diff
changeset
 | 
78  | 
}  | 
| 
 
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
 
wenzelm 
parents: 
66984 
diff
changeset
 | 
79  | 
|
| 
 
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
 
wenzelm 
parents: 
66984 
diff
changeset
 | 
80  | 
|
| 
 
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
 
wenzelm 
parents: 
66984 
diff
changeset
 | 
81  | 
/* completion */  | 
| 
 
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
 
wenzelm 
parents: 
66984 
diff
changeset
 | 
82  | 
|
| 75393 | 83  | 
  private lazy val completion: Completion = {
 | 
| 
67004
 
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
 
wenzelm 
parents: 
66984 
diff
changeset
 | 
84  | 
val completion_keywords = (keywords.minor.iterator ++ keywords.major.iterator).toList  | 
| 
 
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
 
wenzelm 
parents: 
66984 
diff
changeset
 | 
85  | 
val completion_abbrevs =  | 
| 
 
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
 
wenzelm 
parents: 
66984 
diff
changeset
 | 
86  | 
completion_keywords.flatMap(name =>  | 
| 
 
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
 
wenzelm 
parents: 
66984 
diff
changeset
 | 
87  | 
(if (Keyword.theory_block.contains(keywords.kinds(name)))  | 
| 
 
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
 
wenzelm 
parents: 
66984 
diff
changeset
 | 
88  | 
List((name, name + "\nbegin\n\u0007\nend"))  | 
| 
 
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
 
wenzelm 
parents: 
66984 
diff
changeset
 | 
89  | 
else Nil) :::  | 
| 
 
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
 
wenzelm 
parents: 
66984 
diff
changeset
 | 
90  | 
(if (Completion.Word_Parsers.is_word(name)) List((name, name)) else Nil)) :::  | 
| 
 
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
 
wenzelm 
parents: 
66984 
diff
changeset
 | 
91  | 
abbrevs.flatMap(  | 
| 
 
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
 
wenzelm 
parents: 
66984 
diff
changeset
 | 
92  | 
        { case (a, b) =>
 | 
| 63579 | 93  | 
val a1 = Symbol.decode(a)  | 
94  | 
val a2 = Symbol.encode(a)  | 
|
95  | 
val b1 = Symbol.decode(b)  | 
|
96  | 
List((a1, b1), (a2, b1))  | 
|
| 
67004
 
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
 
wenzelm 
parents: 
66984 
diff
changeset
 | 
97  | 
})  | 
| 
 
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
 
wenzelm 
parents: 
66984 
diff
changeset
 | 
98  | 
Completion.make(completion_keywords, completion_abbrevs)  | 
| 
 
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
 
wenzelm 
parents: 
66984 
diff
changeset
 | 
99  | 
}  | 
| 34166 | 100  | 
|
| 67005 | 101  | 
def complete(  | 
102  | 
history: Completion.History,  | 
|
103  | 
unicode: Boolean,  | 
|
104  | 
explicit: Boolean,  | 
|
105  | 
start: Text.Offset,  | 
|
106  | 
text: CharSequence,  | 
|
107  | 
caret: Int,  | 
|
| 75393 | 108  | 
context: Completion.Language_Context  | 
109  | 
  ): Option[Completion.Result] = {
 | 
|
| 67005 | 110  | 
completion.complete(history, unicode, explicit, start, text, caret, context)  | 
111  | 
}  | 
|
112  | 
||
| 58695 | 113  | 
|
| 
66717
 
67dbf5cdc056
more informative loaded_theories: dependencies and syntax;
 
wenzelm 
parents: 
65384 
diff
changeset
 | 
114  | 
/* build */  | 
| 
 
67dbf5cdc056
more informative loaded_theories: dependencies and syntax;
 
wenzelm 
parents: 
65384 
diff
changeset
 | 
115  | 
|
| 
 
67dbf5cdc056
more informative loaded_theories: dependencies and syntax;
 
wenzelm 
parents: 
65384 
diff
changeset
 | 
116  | 
def + (header: Document.Node.Header): Outer_Syntax =  | 
| 
 
67dbf5cdc056
more informative loaded_theories: dependencies and syntax;
 
wenzelm 
parents: 
65384 
diff
changeset
 | 
117  | 
add_keywords(header.keywords).add_abbrevs(header.abbrevs)  | 
| 59073 | 118  | 
|
| 
63584
 
68751fe1c036
tuned signature -- prover-independence is presently theoretical;
 
wenzelm 
parents: 
63579 
diff
changeset
 | 
119  | 
def ++ (other: Outer_Syntax): Outer_Syntax =  | 
| 59073 | 120  | 
if (this eq other) this  | 
| 66776 | 121  | 
else if (this eq Outer_Syntax.empty) other  | 
| 59073 | 122  | 
    else {
 | 
| 63865 | 123  | 
val keywords1 = keywords ++ other.keywords  | 
| 63867 | 124  | 
val rev_abbrevs1 = Library.merge(rev_abbrevs, other.rev_abbrevs)  | 
| 
67004
 
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
 
wenzelm 
parents: 
66984 
diff
changeset
 | 
125  | 
if ((keywords eq keywords1) && (rev_abbrevs eq rev_abbrevs1)) this  | 
| 
 
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
 
wenzelm 
parents: 
66984 
diff
changeset
 | 
126  | 
else new Outer_Syntax(keywords1, rev_abbrevs1, language_context, has_tokens)  | 
| 59073 | 127  | 
}  | 
128  | 
||
129  | 
||
| 59735 | 130  | 
/* load commands */  | 
| 58900 | 131  | 
|
| 
72748
 
04d5f6d769a7
more flexible syntax for theory load commands via Isabelle/Scala;
 
wenzelm 
parents: 
72740 
diff
changeset
 | 
132  | 
def load_command(name: String): Option[String] =  | 
| 72740 | 133  | 
keywords.load_commands.get(name)  | 
134  | 
||
135  | 
def has_load_commands(text: String): Boolean =  | 
|
136  | 
    keywords.load_commands.exists({ case (cmd, _) => text.containsSlice(cmd) })
 | 
|
| 58900 | 137  | 
|
138  | 
||
| 58706 | 139  | 
/* language context */  | 
| 34166 | 140  | 
|
| 58706 | 141  | 
def set_language_context(context: Completion.Language_Context): Outer_Syntax =  | 
| 
67004
 
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
 
wenzelm 
parents: 
66984 
diff
changeset
 | 
142  | 
new Outer_Syntax(keywords, rev_abbrevs, context, has_tokens)  | 
| 58706 | 143  | 
|
| 75393 | 144  | 
  def no_tokens: Outer_Syntax = {
 | 
| 
73120
 
c3589f2dff31
more informative errors: simplify diagnosis of spurious failures reported by users;
 
wenzelm 
parents: 
73115 
diff
changeset
 | 
145  | 
require(keywords.is_empty, "bad syntax keywords")  | 
| 58706 | 146  | 
new Outer_Syntax(  | 
| 63867 | 147  | 
rev_abbrevs = rev_abbrevs,  | 
| 58706 | 148  | 
language_context = language_context,  | 
149  | 
has_tokens = false)  | 
|
| 46969 | 150  | 
}  | 
| 
40454
 
2516ea25a54b
some support for nested source structure, based on section headings;
 
wenzelm 
parents: 
38471 
diff
changeset
 | 
151  | 
|
| 58706 | 152  | 
|
| 
40454
 
2516ea25a54b
some support for nested source structure, based on section headings;
 
wenzelm 
parents: 
38471 
diff
changeset
 | 
153  | 
|
| 58706 | 154  | 
/** parsing **/  | 
| 34166 | 155  | 
|
| 58706 | 156  | 
/* command spans */  | 
| 
57905
 
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
 
wenzelm 
parents: 
57901 
diff
changeset
 | 
157  | 
|
| 75393 | 158  | 
  def parse_spans(toks: List[Token]): List[Command_Span.Span] = {
 | 
| 
57905
 
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
 
wenzelm 
parents: 
57901 
diff
changeset
 | 
159  | 
val result = new mutable.ListBuffer[Command_Span.Span]  | 
| 
 
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
 
wenzelm 
parents: 
57901 
diff
changeset
 | 
160  | 
val content = new mutable.ListBuffer[Token]  | 
| 
68729
 
3a02b424d5fb
clarified ignored span / core range: include formal comments, e.g. relevant for error messages from antiquotations;
 
wenzelm 
parents: 
67005 
diff
changeset
 | 
161  | 
val ignored = new mutable.ListBuffer[Token]  | 
| 
57905
 
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
 
wenzelm 
parents: 
57901 
diff
changeset
 | 
162  | 
|
| 75393 | 163  | 
    def ship(content: List[Token]): Unit = {
 | 
| 
57905
 
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
 
wenzelm 
parents: 
57901 
diff
changeset
 | 
164  | 
val kind =  | 
| 72800 | 165  | 
if (content.forall(_.is_ignored)) Command_Span.Ignored_Span  | 
166  | 
else if (content.exists(_.is_error)) Command_Span.Malformed_Span  | 
|
| 
59924
 
801b979ec0c2
more general notion of command span: command keyword not necessarily at start;
 
wenzelm 
parents: 
59735 
diff
changeset
 | 
167  | 
else  | 
| 72800 | 168  | 
          content.find(_.is_command) match {
 | 
| 
59924
 
801b979ec0c2
more general notion of command span: command keyword not necessarily at start;
 
wenzelm 
parents: 
59735 
diff
changeset
 | 
169  | 
case None => Command_Span.Malformed_Span  | 
| 
 
801b979ec0c2
more general notion of command span: command keyword not necessarily at start;
 
wenzelm 
parents: 
59735 
diff
changeset
 | 
170  | 
case Some(cmd) =>  | 
| 
 
801b979ec0c2
more general notion of command span: command keyword not necessarily at start;
 
wenzelm 
parents: 
59735 
diff
changeset
 | 
171  | 
val name = cmd.source  | 
| 
78912
 
ff4496b25197
clarified signature: Command_Span.Kind already contains keyword_kind, so parsing document structure no longer requires Keyword.Keywords;
 
wenzelm 
parents: 
75393 
diff
changeset
 | 
172  | 
val keyword_kind = keywords.kinds.get(name)  | 
| 
59924
 
801b979ec0c2
more general notion of command span: command keyword not necessarily at start;
 
wenzelm 
parents: 
59735 
diff
changeset
 | 
173  | 
val offset =  | 
| 73359 | 174  | 
                content.takeWhile(_ != cmd).foldLeft(0) {
 | 
175  | 
case (i, tok) => i + Symbol.length(tok.source)  | 
|
176  | 
}  | 
|
| 64616 | 177  | 
val end_offset = offset + Symbol.length(name)  | 
| 72800 | 178  | 
val range = Text.Range(offset, end_offset) + 1  | 
| 
78912
 
ff4496b25197
clarified signature: Command_Span.Kind already contains keyword_kind, so parsing document structure no longer requires Keyword.Keywords;
 
wenzelm 
parents: 
75393 
diff
changeset
 | 
179  | 
Command_Span.Command_Span(keyword_kind, name, Position.Range(range))  | 
| 
59924
 
801b979ec0c2
more general notion of command span: command keyword not necessarily at start;
 
wenzelm 
parents: 
59735 
diff
changeset
 | 
180  | 
}  | 
| 72800 | 181  | 
result += Command_Span.Span(kind, content)  | 
| 
57905
 
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
 
wenzelm 
parents: 
57901 
diff
changeset
 | 
182  | 
}  | 
| 
 
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
 
wenzelm 
parents: 
57901 
diff
changeset
 | 
183  | 
|
| 75393 | 184  | 
    def flush(): Unit = {
 | 
| 73367 | 185  | 
      if (content.nonEmpty) { ship(content.toList); content.clear() }
 | 
186  | 
      if (ignored.nonEmpty) { ship(ignored.toList); ignored.clear() }
 | 
|
| 
57905
 
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
 
wenzelm 
parents: 
57901 
diff
changeset
 | 
187  | 
}  | 
| 
 
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
 
wenzelm 
parents: 
57901 
diff
changeset
 | 
188  | 
|
| 
 
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
 
wenzelm 
parents: 
57901 
diff
changeset
 | 
189  | 
    for (tok <- toks) {
 | 
| 
68729
 
3a02b424d5fb
clarified ignored span / core range: include formal comments, e.g. relevant for error messages from antiquotations;
 
wenzelm 
parents: 
67005 
diff
changeset
 | 
190  | 
if (tok.is_ignored) ignored += tok  | 
| 63441 | 191  | 
else if (keywords.is_before_command(tok) ||  | 
192  | 
tok.is_command &&  | 
|
| 75393 | 193  | 
          (!content.exists(keywords.is_before_command) || content.exists(_.is_command))) {
 | 
194  | 
flush(); content += tok  | 
|
195  | 
}  | 
|
| 73367 | 196  | 
      else { content ++= ignored; ignored.clear(); content += tok }
 | 
| 
57905
 
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
 
wenzelm 
parents: 
57901 
diff
changeset
 | 
197  | 
}  | 
| 
 
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
 
wenzelm 
parents: 
57901 
diff
changeset
 | 
198  | 
flush()  | 
| 
 
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
 
wenzelm 
parents: 
57901 
diff
changeset
 | 
199  | 
|
| 
 
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
 
wenzelm 
parents: 
57901 
diff
changeset
 | 
200  | 
result.toList  | 
| 
 
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
 
wenzelm 
parents: 
57901 
diff
changeset
 | 
201  | 
}  | 
| 
 
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
 
wenzelm 
parents: 
57901 
diff
changeset
 | 
202  | 
|
| 57906 | 203  | 
def parse_spans(input: CharSequence): List[Command_Span.Span] =  | 
| 59083 | 204  | 
parse_spans(Token.explode(keywords, input))  | 
| 34166 | 205  | 
}  |