| author | wenzelm | 
| Mon, 01 Apr 2024 15:37:55 +0200 | |
| changeset 80064 | 0d94dd2fd2d0 | 
| parent 78912 | ff4496b25197 | 
| child 80441 | c420429fdf4c | 
| permissions | -rw-r--r-- | 
| 34166 | 1 | /* Title: Pure/Isar/outer_syntax.scala | 
| 2 | Author: Makarius | |
| 3 | ||
| 4 | Isabelle/Isar outer syntax. | |
| 5 | */ | |
| 6 | ||
| 7 | package isabelle | |
| 8 | ||
| 9 | ||
| 43411 
0206466ee473
some support for partial scans with explicit context;
 wenzelm parents: 
40533diff
changeset | 10 | import scala.collection.mutable | 
| 34166 | 11 | |
| 12 | ||
| 75393 | 13 | object Outer_Syntax {
 | 
| 58706 | 14 | /* syntax */ | 
| 15 | ||
| 16 | val empty: Outer_Syntax = new Outer_Syntax() | |
| 17 | ||
| 73359 | 18 | def merge(syns: List[Outer_Syntax]): Outer_Syntax = syns.foldLeft(empty)(_ ++ _) | 
| 66717 
67dbf5cdc056
more informative loaded_theories: dependencies and syntax;
 wenzelm parents: 
65384diff
changeset | 19 | |
| 58706 | 20 | |
| 21 | /* string literals */ | |
| 22 | ||
| 75393 | 23 |   def quote_string(str: String): String = {
 | 
| 43774 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 24 | val result = new StringBuilder(str.length + 10) | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 25 | result += '"' | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 26 |     for (s <- Symbol.iterator(str)) {
 | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 27 |       if (s.length == 1) {
 | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 28 | val c = s(0) | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 29 |         if (c < 32 && c != YXML.X && c != YXML.Y || c == '\\' || c == '"') {
 | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 30 | result += '\\' | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 31 | if (c < 10) result += '0' | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 32 | if (c < 100) result += '0' | 
| 60215 | 33 | result ++= c.asInstanceOf[Int].toString | 
| 43774 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 34 | } | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 35 | else result += c | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 36 | } | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 37 | else result ++= s | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 38 | } | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 39 | result += '"' | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 40 | result.toString | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 41 | } | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 42 | } | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 43 | |
| 46712 | 44 | final class Outer_Syntax private( | 
| 58900 | 45 | val keywords: Keyword.Keywords = Keyword.Keywords.empty, | 
| 63867 | 46 | val rev_abbrevs: Thy_Header.Abbrevs = Nil, | 
| 55749 | 47 | val language_context: Completion.Language_Context = Completion.Language_Context.outer, | 
| 75393 | 48 | val has_tokens: Boolean = true | 
| 49 | ) {
 | |
| 58706 | 50 | /** syntax content **/ | 
| 51 | ||
| 58900 | 52 | override def toString: String = keywords.toString | 
| 56393 
22f533e6a049
more abstract Prover.Syntax, as proposed by Carst Tankink;
 wenzelm parents: 
56314diff
changeset | 53 | |
| 58695 | 54 | |
| 63867 | 55 | /* keywords */ | 
| 58695 | 56 | |
| 72748 
04d5f6d769a7
more flexible syntax for theory load commands via Isabelle/Scala;
 wenzelm parents: 
72740diff
changeset | 57 | def + (name: String, kind: String = "", load_command: String = ""): Outer_Syntax = | 
| 67004 
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
 wenzelm parents: 
66984diff
changeset | 58 | new Outer_Syntax( | 
| 72748 
04d5f6d769a7
more flexible syntax for theory load commands via Isabelle/Scala;
 wenzelm parents: 
72740diff
changeset | 59 | keywords + (name, kind, load_command), rev_abbrevs, language_context, has_tokens = true) | 
| 48706 | 60 | |
| 48873 | 61 | def add_keywords(keywords: Thy_Header.Keywords): Outer_Syntax = | 
| 73359 | 62 |     keywords.foldLeft(this) {
 | 
| 65384 | 63 | case (syntax, (name, spec)) => | 
| 64 | syntax + | |
| 72748 
04d5f6d769a7
more flexible syntax for theory load commands via Isabelle/Scala;
 wenzelm parents: 
72740diff
changeset | 65 | (Symbol.decode(name), spec.kind, spec.load_command) + | 
| 
04d5f6d769a7
more flexible syntax for theory load commands via Isabelle/Scala;
 wenzelm parents: 
72740diff
changeset | 66 | (Symbol.encode(name), spec.kind, spec.load_command) | 
| 63579 | 67 | } | 
| 68 | ||
| 63867 | 69 | |
| 70 | /* abbrevs */ | |
| 71 | ||
| 72 | def abbrevs: Thy_Header.Abbrevs = rev_abbrevs.reverse | |
| 73 | ||
| 74 | def add_abbrevs(new_abbrevs: Thy_Header.Abbrevs): Outer_Syntax = | |
| 75 | if (new_abbrevs.isEmpty) this | |
| 63579 | 76 |     else {
 | 
| 67004 
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
 wenzelm parents: 
66984diff
changeset | 77 | val rev_abbrevs1 = Library.distinct(new_abbrevs) reverse_::: rev_abbrevs | 
| 
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
 wenzelm parents: 
66984diff
changeset | 78 | new Outer_Syntax(keywords, rev_abbrevs1, language_context, has_tokens) | 
| 
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
 wenzelm parents: 
66984diff
changeset | 79 | } | 
| 
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
 wenzelm parents: 
66984diff
changeset | 80 | |
| 
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
 wenzelm parents: 
66984diff
changeset | 81 | |
| 
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
 wenzelm parents: 
66984diff
changeset | 82 | /* completion */ | 
| 
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
 wenzelm parents: 
66984diff
changeset | 83 | |
| 75393 | 84 |   private lazy val completion: Completion = {
 | 
| 67004 
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
 wenzelm parents: 
66984diff
changeset | 85 | val completion_keywords = (keywords.minor.iterator ++ keywords.major.iterator).toList | 
| 
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
 wenzelm parents: 
66984diff
changeset | 86 | val completion_abbrevs = | 
| 
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
 wenzelm parents: 
66984diff
changeset | 87 | completion_keywords.flatMap(name => | 
| 
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
 wenzelm parents: 
66984diff
changeset | 88 | (if (Keyword.theory_block.contains(keywords.kinds(name))) | 
| 
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
 wenzelm parents: 
66984diff
changeset | 89 | List((name, name + "\nbegin\n\u0007\nend")) | 
| 
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
 wenzelm parents: 
66984diff
changeset | 90 | else Nil) ::: | 
| 
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
 wenzelm parents: 
66984diff
changeset | 91 | (if (Completion.Word_Parsers.is_word(name)) List((name, name)) else Nil)) ::: | 
| 
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
 wenzelm parents: 
66984diff
changeset | 92 | abbrevs.flatMap( | 
| 
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
 wenzelm parents: 
66984diff
changeset | 93 |         { case (a, b) =>
 | 
| 63579 | 94 | val a1 = Symbol.decode(a) | 
| 95 | val a2 = Symbol.encode(a) | |
| 96 | val b1 = Symbol.decode(b) | |
| 97 | List((a1, b1), (a2, b1)) | |
| 67004 
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
 wenzelm parents: 
66984diff
changeset | 98 | }) | 
| 
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
 wenzelm parents: 
66984diff
changeset | 99 | Completion.make(completion_keywords, completion_abbrevs) | 
| 
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
 wenzelm parents: 
66984diff
changeset | 100 | } | 
| 34166 | 101 | |
| 67005 | 102 | def complete( | 
| 103 | history: Completion.History, | |
| 104 | unicode: Boolean, | |
| 105 | explicit: Boolean, | |
| 106 | start: Text.Offset, | |
| 107 | text: CharSequence, | |
| 108 | caret: Int, | |
| 75393 | 109 | context: Completion.Language_Context | 
| 110 |   ): Option[Completion.Result] = {
 | |
| 67005 | 111 | completion.complete(history, unicode, explicit, start, text, caret, context) | 
| 112 | } | |
| 113 | ||
| 58695 | 114 | |
| 66717 
67dbf5cdc056
more informative loaded_theories: dependencies and syntax;
 wenzelm parents: 
65384diff
changeset | 115 | /* build */ | 
| 
67dbf5cdc056
more informative loaded_theories: dependencies and syntax;
 wenzelm parents: 
65384diff
changeset | 116 | |
| 
67dbf5cdc056
more informative loaded_theories: dependencies and syntax;
 wenzelm parents: 
65384diff
changeset | 117 | def + (header: Document.Node.Header): Outer_Syntax = | 
| 
67dbf5cdc056
more informative loaded_theories: dependencies and syntax;
 wenzelm parents: 
65384diff
changeset | 118 | add_keywords(header.keywords).add_abbrevs(header.abbrevs) | 
| 59073 | 119 | |
| 63584 
68751fe1c036
tuned signature -- prover-independence is presently theoretical;
 wenzelm parents: 
63579diff
changeset | 120 | def ++ (other: Outer_Syntax): Outer_Syntax = | 
| 59073 | 121 | if (this eq other) this | 
| 66776 | 122 | else if (this eq Outer_Syntax.empty) other | 
| 59073 | 123 |     else {
 | 
| 63865 | 124 | val keywords1 = keywords ++ other.keywords | 
| 63867 | 125 | val rev_abbrevs1 = Library.merge(rev_abbrevs, other.rev_abbrevs) | 
| 67004 
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
 wenzelm parents: 
66984diff
changeset | 126 | if ((keywords eq keywords1) && (rev_abbrevs eq rev_abbrevs1)) this | 
| 
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
 wenzelm parents: 
66984diff
changeset | 127 | else new Outer_Syntax(keywords1, rev_abbrevs1, language_context, has_tokens) | 
| 59073 | 128 | } | 
| 129 | ||
| 130 | ||
| 59735 | 131 | /* load commands */ | 
| 58900 | 132 | |
| 72748 
04d5f6d769a7
more flexible syntax for theory load commands via Isabelle/Scala;
 wenzelm parents: 
72740diff
changeset | 133 | def load_command(name: String): Option[String] = | 
| 72740 | 134 | keywords.load_commands.get(name) | 
| 135 | ||
| 136 | def has_load_commands(text: String): Boolean = | |
| 137 |     keywords.load_commands.exists({ case (cmd, _) => text.containsSlice(cmd) })
 | |
| 58900 | 138 | |
| 139 | ||
| 58706 | 140 | /* language context */ | 
| 34166 | 141 | |
| 58706 | 142 | def set_language_context(context: Completion.Language_Context): Outer_Syntax = | 
| 67004 
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
 wenzelm parents: 
66984diff
changeset | 143 | new Outer_Syntax(keywords, rev_abbrevs, context, has_tokens) | 
| 58706 | 144 | |
| 75393 | 145 |   def no_tokens: Outer_Syntax = {
 | 
| 73120 
c3589f2dff31
more informative errors: simplify diagnosis of spurious failures reported by users;
 wenzelm parents: 
73115diff
changeset | 146 | require(keywords.is_empty, "bad syntax keywords") | 
| 58706 | 147 | new Outer_Syntax( | 
| 63867 | 148 | rev_abbrevs = rev_abbrevs, | 
| 58706 | 149 | language_context = language_context, | 
| 150 | has_tokens = false) | |
| 46969 | 151 | } | 
| 40454 
2516ea25a54b
some support for nested source structure, based on section headings;
 wenzelm parents: 
38471diff
changeset | 152 | |
| 58706 | 153 | |
| 40454 
2516ea25a54b
some support for nested source structure, based on section headings;
 wenzelm parents: 
38471diff
changeset | 154 | |
| 58706 | 155 | /** parsing **/ | 
| 34166 | 156 | |
| 58706 | 157 | /* command spans */ | 
| 57905 
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
 wenzelm parents: 
57901diff
changeset | 158 | |
| 75393 | 159 |   def parse_spans(toks: List[Token]): List[Command_Span.Span] = {
 | 
| 57905 
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
 wenzelm parents: 
57901diff
changeset | 160 | val result = new mutable.ListBuffer[Command_Span.Span] | 
| 
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
 wenzelm parents: 
57901diff
changeset | 161 | val content = new mutable.ListBuffer[Token] | 
| 68729 
3a02b424d5fb
clarified ignored span / core range: include formal comments, e.g. relevant for error messages from antiquotations;
 wenzelm parents: 
67005diff
changeset | 162 | val ignored = new mutable.ListBuffer[Token] | 
| 57905 
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
 wenzelm parents: 
57901diff
changeset | 163 | |
| 75393 | 164 |     def ship(content: List[Token]): Unit = {
 | 
| 57905 
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
 wenzelm parents: 
57901diff
changeset | 165 | val kind = | 
| 72800 | 166 | if (content.forall(_.is_ignored)) Command_Span.Ignored_Span | 
| 167 | else if (content.exists(_.is_error)) Command_Span.Malformed_Span | |
| 59924 
801b979ec0c2
more general notion of command span: command keyword not necessarily at start;
 wenzelm parents: 
59735diff
changeset | 168 | else | 
| 72800 | 169 |           content.find(_.is_command) match {
 | 
| 59924 
801b979ec0c2
more general notion of command span: command keyword not necessarily at start;
 wenzelm parents: 
59735diff
changeset | 170 | case None => Command_Span.Malformed_Span | 
| 
801b979ec0c2
more general notion of command span: command keyword not necessarily at start;
 wenzelm parents: 
59735diff
changeset | 171 | case Some(cmd) => | 
| 
801b979ec0c2
more general notion of command span: command keyword not necessarily at start;
 wenzelm parents: 
59735diff
changeset | 172 | val name = cmd.source | 
| 78912 
ff4496b25197
clarified signature: Command_Span.Kind already contains keyword_kind, so parsing document structure no longer requires Keyword.Keywords;
 wenzelm parents: 
75393diff
changeset | 173 | val keyword_kind = keywords.kinds.get(name) | 
| 59924 
801b979ec0c2
more general notion of command span: command keyword not necessarily at start;
 wenzelm parents: 
59735diff
changeset | 174 | val offset = | 
| 73359 | 175 |                 content.takeWhile(_ != cmd).foldLeft(0) {
 | 
| 176 | case (i, tok) => i + Symbol.length(tok.source) | |
| 177 | } | |
| 64616 | 178 | val end_offset = offset + Symbol.length(name) | 
| 72800 | 179 | val range = Text.Range(offset, end_offset) + 1 | 
| 78912 
ff4496b25197
clarified signature: Command_Span.Kind already contains keyword_kind, so parsing document structure no longer requires Keyword.Keywords;
 wenzelm parents: 
75393diff
changeset | 180 | Command_Span.Command_Span(keyword_kind, name, Position.Range(range)) | 
| 59924 
801b979ec0c2
more general notion of command span: command keyword not necessarily at start;
 wenzelm parents: 
59735diff
changeset | 181 | } | 
| 72800 | 182 | result += Command_Span.Span(kind, content) | 
| 57905 
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
 wenzelm parents: 
57901diff
changeset | 183 | } | 
| 
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
 wenzelm parents: 
57901diff
changeset | 184 | |
| 75393 | 185 |     def flush(): Unit = {
 | 
| 73367 | 186 |       if (content.nonEmpty) { ship(content.toList); content.clear() }
 | 
| 187 |       if (ignored.nonEmpty) { ship(ignored.toList); ignored.clear() }
 | |
| 57905 
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
 wenzelm parents: 
57901diff
changeset | 188 | } | 
| 
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
 wenzelm parents: 
57901diff
changeset | 189 | |
| 
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
 wenzelm parents: 
57901diff
changeset | 190 |     for (tok <- toks) {
 | 
| 68729 
3a02b424d5fb
clarified ignored span / core range: include formal comments, e.g. relevant for error messages from antiquotations;
 wenzelm parents: 
67005diff
changeset | 191 | if (tok.is_ignored) ignored += tok | 
| 63441 | 192 | else if (keywords.is_before_command(tok) || | 
| 193 | tok.is_command && | |
| 75393 | 194 |           (!content.exists(keywords.is_before_command) || content.exists(_.is_command))) {
 | 
| 195 | flush(); content += tok | |
| 196 | } | |
| 73367 | 197 |       else { content ++= ignored; ignored.clear(); content += tok }
 | 
| 57905 
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
 wenzelm parents: 
57901diff
changeset | 198 | } | 
| 
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
 wenzelm parents: 
57901diff
changeset | 199 | flush() | 
| 
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
 wenzelm parents: 
57901diff
changeset | 200 | |
| 
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
 wenzelm parents: 
57901diff
changeset | 201 | result.toList | 
| 
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
 wenzelm parents: 
57901diff
changeset | 202 | } | 
| 
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
 wenzelm parents: 
57901diff
changeset | 203 | |
| 57906 | 204 | def parse_spans(input: CharSequence): List[Command_Span.Span] = | 
| 59083 | 205 | parse_spans(Token.explode(keywords, input)) | 
| 34166 | 206 | } |