| author | wenzelm | 
| Tue, 23 Nov 2021 21:02:13 +0100 | |
| changeset 74836 | a97ec0954c50 | 
| parent 73367 | 77ef8bef0593 | 
| child 75393 | 87ebf5a50283 | 
| permissions | -rw-r--r-- | 
| 34166 | 1 | /* Title: Pure/Isar/outer_syntax.scala | 
| 2 | Author: Makarius | |
| 3 | ||
| 4 | Isabelle/Isar outer syntax. | |
| 5 | */ | |
| 6 | ||
| 7 | package isabelle | |
| 8 | ||
| 9 | ||
| 43411 
0206466ee473
some support for partial scans with explicit context;
 wenzelm parents: 
40533diff
changeset | 10 | import scala.collection.mutable | 
| 34166 | 11 | |
| 12 | ||
| 43774 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 13 | object Outer_Syntax | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 14 | {
 | 
| 58706 | 15 | /* syntax */ | 
| 16 | ||
| 17 | val empty: Outer_Syntax = new Outer_Syntax() | |
| 18 | ||
| 73359 | 19 | def merge(syns: List[Outer_Syntax]): Outer_Syntax = syns.foldLeft(empty)(_ ++ _) | 
| 66717 
67dbf5cdc056
more informative loaded_theories: dependencies and syntax;
 wenzelm parents: 
65384diff
changeset | 20 | |
| 58706 | 21 | |
| 22 | /* string literals */ | |
| 23 | ||
| 43774 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 24 | def quote_string(str: String): String = | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 25 |   {
 | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 26 | val result = new StringBuilder(str.length + 10) | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 27 | result += '"' | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 28 |     for (s <- Symbol.iterator(str)) {
 | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 29 |       if (s.length == 1) {
 | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 30 | val c = s(0) | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 31 |         if (c < 32 && c != YXML.X && c != YXML.Y || c == '\\' || c == '"') {
 | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 32 | result += '\\' | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 33 | if (c < 10) result += '0' | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 34 | if (c < 100) result += '0' | 
| 60215 | 35 | result ++= c.asInstanceOf[Int].toString | 
| 43774 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 36 | } | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 37 | else result += c | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 38 | } | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 39 | else result ++= s | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 40 | } | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 41 | result += '"' | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 42 | result.toString | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 43 | } | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 44 | } | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 45 | |
| 46712 | 46 | final class Outer_Syntax private( | 
| 58900 | 47 | val keywords: Keyword.Keywords = Keyword.Keywords.empty, | 
| 63867 | 48 | val rev_abbrevs: Thy_Header.Abbrevs = Nil, | 
| 55749 | 49 | val language_context: Completion.Language_Context = Completion.Language_Context.outer, | 
| 63584 
68751fe1c036
tuned signature -- prover-independence is presently theoretical;
 wenzelm parents: 
63579diff
changeset | 50 | val has_tokens: Boolean = true) | 
| 34166 | 51 | {
 | 
| 58706 | 52 | /** syntax content **/ | 
| 53 | ||
| 58900 | 54 | override def toString: String = keywords.toString | 
| 56393 
22f533e6a049
more abstract Prover.Syntax, as proposed by Carst Tankink;
 wenzelm parents: 
56314diff
changeset | 55 | |
| 58695 | 56 | |
| 63867 | 57 | /* keywords */ | 
| 58695 | 58 | |
| 72748 
04d5f6d769a7
more flexible syntax for theory load commands via Isabelle/Scala;
 wenzelm parents: 
72740diff
changeset | 59 | def + (name: String, kind: String = "", load_command: String = ""): Outer_Syntax = | 
| 67004 
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
 wenzelm parents: 
66984diff
changeset | 60 | new Outer_Syntax( | 
| 72748 
04d5f6d769a7
more flexible syntax for theory load commands via Isabelle/Scala;
 wenzelm parents: 
72740diff
changeset | 61 | keywords + (name, kind, load_command), rev_abbrevs, language_context, has_tokens = true) | 
| 48706 | 62 | |
| 48873 | 63 | def add_keywords(keywords: Thy_Header.Keywords): Outer_Syntax = | 
| 73359 | 64 |     keywords.foldLeft(this) {
 | 
| 65384 | 65 | case (syntax, (name, spec)) => | 
| 66 | syntax + | |
| 72748 
04d5f6d769a7
more flexible syntax for theory load commands via Isabelle/Scala;
 wenzelm parents: 
72740diff
changeset | 67 | (Symbol.decode(name), spec.kind, spec.load_command) + | 
| 
04d5f6d769a7
more flexible syntax for theory load commands via Isabelle/Scala;
 wenzelm parents: 
72740diff
changeset | 68 | (Symbol.encode(name), spec.kind, spec.load_command) | 
| 63579 | 69 | } | 
| 70 | ||
| 63867 | 71 | |
| 72 | /* abbrevs */ | |
| 73 | ||
| 74 | def abbrevs: Thy_Header.Abbrevs = rev_abbrevs.reverse | |
| 75 | ||
| 76 | def add_abbrevs(new_abbrevs: Thy_Header.Abbrevs): Outer_Syntax = | |
| 77 | if (new_abbrevs.isEmpty) this | |
| 63579 | 78 |     else {
 | 
| 67004 
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
 wenzelm parents: 
66984diff
changeset | 79 | val rev_abbrevs1 = Library.distinct(new_abbrevs) reverse_::: rev_abbrevs | 
| 
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
 wenzelm parents: 
66984diff
changeset | 80 | new Outer_Syntax(keywords, rev_abbrevs1, language_context, has_tokens) | 
| 
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
 wenzelm parents: 
66984diff
changeset | 81 | } | 
| 
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
 wenzelm parents: 
66984diff
changeset | 82 | |
| 
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
 wenzelm parents: 
66984diff
changeset | 83 | |
| 
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
 wenzelm parents: 
66984diff
changeset | 84 | /* completion */ | 
| 
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
 wenzelm parents: 
66984diff
changeset | 85 | |
| 67005 | 86 | private lazy val completion: Completion = | 
| 67004 
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
 wenzelm parents: 
66984diff
changeset | 87 |   {
 | 
| 
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
 wenzelm parents: 
66984diff
changeset | 88 | val completion_keywords = (keywords.minor.iterator ++ keywords.major.iterator).toList | 
| 
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
 wenzelm parents: 
66984diff
changeset | 89 | val completion_abbrevs = | 
| 
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
 wenzelm parents: 
66984diff
changeset | 90 | completion_keywords.flatMap(name => | 
| 
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
 wenzelm parents: 
66984diff
changeset | 91 | (if (Keyword.theory_block.contains(keywords.kinds(name))) | 
| 
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
 wenzelm parents: 
66984diff
changeset | 92 | List((name, name + "\nbegin\n\u0007\nend")) | 
| 
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
 wenzelm parents: 
66984diff
changeset | 93 | else Nil) ::: | 
| 
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
 wenzelm parents: 
66984diff
changeset | 94 | (if (Completion.Word_Parsers.is_word(name)) List((name, name)) else Nil)) ::: | 
| 
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
 wenzelm parents: 
66984diff
changeset | 95 | abbrevs.flatMap( | 
| 
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
 wenzelm parents: 
66984diff
changeset | 96 |         { case (a, b) =>
 | 
| 63579 | 97 | val a1 = Symbol.decode(a) | 
| 98 | val a2 = Symbol.encode(a) | |
| 99 | val b1 = Symbol.decode(b) | |
| 100 | List((a1, b1), (a2, b1)) | |
| 67004 
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
 wenzelm parents: 
66984diff
changeset | 101 | }) | 
| 
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
 wenzelm parents: 
66984diff
changeset | 102 | Completion.make(completion_keywords, completion_abbrevs) | 
| 
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
 wenzelm parents: 
66984diff
changeset | 103 | } | 
| 34166 | 104 | |
| 67005 | 105 | def complete( | 
| 106 | history: Completion.History, | |
| 107 | unicode: Boolean, | |
| 108 | explicit: Boolean, | |
| 109 | start: Text.Offset, | |
| 110 | text: CharSequence, | |
| 111 | caret: Int, | |
| 112 | context: Completion.Language_Context): Option[Completion.Result] = | |
| 113 |   {
 | |
| 114 | completion.complete(history, unicode, explicit, start, text, caret, context) | |
| 115 | } | |
| 116 | ||
| 58695 | 117 | |
| 66717 
67dbf5cdc056
more informative loaded_theories: dependencies and syntax;
 wenzelm parents: 
65384diff
changeset | 118 | /* build */ | 
| 
67dbf5cdc056
more informative loaded_theories: dependencies and syntax;
 wenzelm parents: 
65384diff
changeset | 119 | |
| 
67dbf5cdc056
more informative loaded_theories: dependencies and syntax;
 wenzelm parents: 
65384diff
changeset | 120 | def + (header: Document.Node.Header): Outer_Syntax = | 
| 
67dbf5cdc056
more informative loaded_theories: dependencies and syntax;
 wenzelm parents: 
65384diff
changeset | 121 | add_keywords(header.keywords).add_abbrevs(header.abbrevs) | 
| 59073 | 122 | |
| 63584 
68751fe1c036
tuned signature -- prover-independence is presently theoretical;
 wenzelm parents: 
63579diff
changeset | 123 | def ++ (other: Outer_Syntax): Outer_Syntax = | 
| 59073 | 124 | if (this eq other) this | 
| 66776 | 125 | else if (this eq Outer_Syntax.empty) other | 
| 59073 | 126 |     else {
 | 
| 63865 | 127 | val keywords1 = keywords ++ other.keywords | 
| 63867 | 128 | val rev_abbrevs1 = Library.merge(rev_abbrevs, other.rev_abbrevs) | 
| 67004 
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
 wenzelm parents: 
66984diff
changeset | 129 | if ((keywords eq keywords1) && (rev_abbrevs eq rev_abbrevs1)) this | 
| 
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
 wenzelm parents: 
66984diff
changeset | 130 | else new Outer_Syntax(keywords1, rev_abbrevs1, language_context, has_tokens) | 
| 59073 | 131 | } | 
| 132 | ||
| 133 | ||
| 59735 | 134 | /* load commands */ | 
| 58900 | 135 | |
| 72748 
04d5f6d769a7
more flexible syntax for theory load commands via Isabelle/Scala;
 wenzelm parents: 
72740diff
changeset | 136 | def load_command(name: String): Option[String] = | 
| 72740 | 137 | keywords.load_commands.get(name) | 
| 138 | ||
| 139 | def has_load_commands(text: String): Boolean = | |
| 140 |     keywords.load_commands.exists({ case (cmd, _) => text.containsSlice(cmd) })
 | |
| 58900 | 141 | |
| 142 | ||
| 58706 | 143 | /* language context */ | 
| 34166 | 144 | |
| 58706 | 145 | def set_language_context(context: Completion.Language_Context): Outer_Syntax = | 
| 67004 
af72fa58f71b
clarified lazy Completion within Outer_Syntax: measurable speedup of Sessions.deps;
 wenzelm parents: 
66984diff
changeset | 146 | new Outer_Syntax(keywords, rev_abbrevs, context, has_tokens) | 
| 58706 | 147 | |
| 148 | def no_tokens: Outer_Syntax = | |
| 46969 | 149 |   {
 | 
| 73120 
c3589f2dff31
more informative errors: simplify diagnosis of spurious failures reported by users;
 wenzelm parents: 
73115diff
changeset | 150 | require(keywords.is_empty, "bad syntax keywords") | 
| 58706 | 151 | new Outer_Syntax( | 
| 63867 | 152 | rev_abbrevs = rev_abbrevs, | 
| 58706 | 153 | language_context = language_context, | 
| 154 | has_tokens = false) | |
| 46969 | 155 | } | 
| 40454 
2516ea25a54b
some support for nested source structure, based on section headings;
 wenzelm parents: 
38471diff
changeset | 156 | |
| 58706 | 157 | |
| 40454 
2516ea25a54b
some support for nested source structure, based on section headings;
 wenzelm parents: 
38471diff
changeset | 158 | |
| 58706 | 159 | /** parsing **/ | 
| 34166 | 160 | |
| 58706 | 161 | /* command spans */ | 
| 57905 
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
 wenzelm parents: 
57901diff
changeset | 162 | |
| 
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
 wenzelm parents: 
57901diff
changeset | 163 | def parse_spans(toks: List[Token]): List[Command_Span.Span] = | 
| 
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
 wenzelm parents: 
57901diff
changeset | 164 |   {
 | 
| 
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
 wenzelm parents: 
57901diff
changeset | 165 | val result = new mutable.ListBuffer[Command_Span.Span] | 
| 
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
 wenzelm parents: 
57901diff
changeset | 166 | val content = new mutable.ListBuffer[Token] | 
| 68729 
3a02b424d5fb
clarified ignored span / core range: include formal comments, e.g. relevant for error messages from antiquotations;
 wenzelm parents: 
67005diff
changeset | 167 | val ignored = new mutable.ListBuffer[Token] | 
| 57905 
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
 wenzelm parents: 
57901diff
changeset | 168 | |
| 73340 | 169 | def ship(content: List[Token]): Unit = | 
| 57905 
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
 wenzelm parents: 
57901diff
changeset | 170 |     {
 | 
| 
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
 wenzelm parents: 
57901diff
changeset | 171 | val kind = | 
| 72800 | 172 | if (content.forall(_.is_ignored)) Command_Span.Ignored_Span | 
| 173 | else if (content.exists(_.is_error)) Command_Span.Malformed_Span | |
| 59924 
801b979ec0c2
more general notion of command span: command keyword not necessarily at start;
 wenzelm parents: 
59735diff
changeset | 174 | else | 
| 72800 | 175 |           content.find(_.is_command) match {
 | 
| 59924 
801b979ec0c2
more general notion of command span: command keyword not necessarily at start;
 wenzelm parents: 
59735diff
changeset | 176 | case None => Command_Span.Malformed_Span | 
| 
801b979ec0c2
more general notion of command span: command keyword not necessarily at start;
 wenzelm parents: 
59735diff
changeset | 177 | case Some(cmd) => | 
| 
801b979ec0c2
more general notion of command span: command keyword not necessarily at start;
 wenzelm parents: 
59735diff
changeset | 178 | val name = cmd.source | 
| 
801b979ec0c2
more general notion of command span: command keyword not necessarily at start;
 wenzelm parents: 
59735diff
changeset | 179 | val offset = | 
| 73359 | 180 |                 content.takeWhile(_ != cmd).foldLeft(0) {
 | 
| 181 | case (i, tok) => i + Symbol.length(tok.source) | |
| 182 | } | |
| 64616 | 183 | val end_offset = offset + Symbol.length(name) | 
| 72800 | 184 | val range = Text.Range(offset, end_offset) + 1 | 
| 73115 
a8e5d7c9a834
discontinued odd absolute position (amending 85bcdd05c6d0, 1975f397eabb): it violates translation invariance of commands and may lead to redundant re-checking of PIDE document;
 wenzelm parents: 
72800diff
changeset | 185 | Command_Span.Command_Span(name, Position.Range(range)) | 
| 59924 
801b979ec0c2
more general notion of command span: command keyword not necessarily at start;
 wenzelm parents: 
59735diff
changeset | 186 | } | 
| 72800 | 187 | result += Command_Span.Span(kind, content) | 
| 57905 
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
 wenzelm parents: 
57901diff
changeset | 188 | } | 
| 
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
 wenzelm parents: 
57901diff
changeset | 189 | |
| 73340 | 190 | def flush(): Unit = | 
| 57905 
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
 wenzelm parents: 
57901diff
changeset | 191 |     {
 | 
| 73367 | 192 |       if (content.nonEmpty) { ship(content.toList); content.clear() }
 | 
| 193 |       if (ignored.nonEmpty) { ship(ignored.toList); ignored.clear() }
 | |
| 57905 
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
 wenzelm parents: 
57901diff
changeset | 194 | } | 
| 
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
 wenzelm parents: 
57901diff
changeset | 195 | |
| 
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
 wenzelm parents: 
57901diff
changeset | 196 |     for (tok <- toks) {
 | 
| 68729 
3a02b424d5fb
clarified ignored span / core range: include formal comments, e.g. relevant for error messages from antiquotations;
 wenzelm parents: 
67005diff
changeset | 197 | if (tok.is_ignored) ignored += tok | 
| 63441 | 198 | else if (keywords.is_before_command(tok) || | 
| 199 | tok.is_command && | |
| 71601 | 200 | (!content.exists(keywords.is_before_command) || content.exists(_.is_command))) | 
| 59924 
801b979ec0c2
more general notion of command span: command keyword not necessarily at start;
 wenzelm parents: 
59735diff
changeset | 201 |       { flush(); content += tok }
 | 
| 73367 | 202 |       else { content ++= ignored; ignored.clear(); content += tok }
 | 
| 57905 
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
 wenzelm parents: 
57901diff
changeset | 203 | } | 
| 
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
 wenzelm parents: 
57901diff
changeset | 204 | flush() | 
| 
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
 wenzelm parents: 
57901diff
changeset | 205 | |
| 
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
 wenzelm parents: 
57901diff
changeset | 206 | result.toList | 
| 
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
 wenzelm parents: 
57901diff
changeset | 207 | } | 
| 
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
 wenzelm parents: 
57901diff
changeset | 208 | |
| 57906 | 209 | def parse_spans(input: CharSequence): List[Command_Span.Span] = | 
| 59083 | 210 | parse_spans(Token.explode(keywords, input)) | 
| 34166 | 211 | } |