| author | wenzelm | 
| Tue, 20 Dec 2016 10:06:18 +0100 | |
| changeset 64614 | 88211daacf93 | 
| parent 63867 | fb46c031c841 | 
| child 64616 | dc3ec40fe41b | 
| permissions | -rw-r--r-- | 
| 34166 | 1 | /* Title: Pure/Isar/outer_syntax.scala | 
| 2 | Author: Makarius | |
| 3 | ||
| 4 | Isabelle/Isar outer syntax. | |
| 5 | */ | |
| 6 | ||
| 7 | package isabelle | |
| 8 | ||
| 9 | ||
| 43411 
0206466ee473
some support for partial scans with explicit context;
 wenzelm parents: 
40533diff
changeset | 10 | import scala.collection.mutable | 
| 34166 | 11 | |
| 12 | ||
| 43774 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 13 | object Outer_Syntax | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 14 | {
 | 
| 58706 | 15 | /* syntax */ | 
| 16 | ||
| 17 | val empty: Outer_Syntax = new Outer_Syntax() | |
| 18 | ||
| 19 | def init(): Outer_Syntax = new Outer_Syntax(completion = Completion.init()) | |
| 20 | ||
| 21 | ||
| 22 | /* string literals */ | |
| 23 | ||
| 43774 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 24 | def quote_string(str: String): String = | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 25 |   {
 | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 26 | val result = new StringBuilder(str.length + 10) | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 27 | result += '"' | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 28 |     for (s <- Symbol.iterator(str)) {
 | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 29 |       if (s.length == 1) {
 | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 30 | val c = s(0) | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 31 |         if (c < 32 && c != YXML.X && c != YXML.Y || c == '\\' || c == '"') {
 | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 32 | result += '\\' | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 33 | if (c < 10) result += '0' | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 34 | if (c < 100) result += '0' | 
| 60215 | 35 | result ++= c.asInstanceOf[Int].toString | 
| 43774 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 36 | } | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 37 | else result += c | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 38 | } | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 39 | else result ++= s | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 40 | } | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 41 | result += '"' | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 42 | result.toString | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 43 | } | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 44 | } | 
| 
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
 wenzelm parents: 
43695diff
changeset | 45 | |
| 46712 | 46 | final class Outer_Syntax private( | 
| 58900 | 47 | val keywords: Keyword.Keywords = Keyword.Keywords.empty, | 
| 53280 
c63a016805b9
explicit indication of outer syntax with no tokens;
 wenzelm parents: 
52439diff
changeset | 48 | val completion: Completion = Completion.empty, | 
| 63867 | 49 | val rev_abbrevs: Thy_Header.Abbrevs = Nil, | 
| 55749 | 50 | val language_context: Completion.Language_Context = Completion.Language_Context.outer, | 
| 63584 
68751fe1c036
tuned signature -- prover-independence is presently theoretical;
 wenzelm parents: 
63579diff
changeset | 51 | val has_tokens: Boolean = true) | 
| 34166 | 52 | {
 | 
| 58706 | 53 | /** syntax content **/ | 
| 54 | ||
| 58900 | 55 | override def toString: String = keywords.toString | 
| 56393 
22f533e6a049
more abstract Prover.Syntax, as proposed by Carst Tankink;
 wenzelm parents: 
56314diff
changeset | 56 | |
| 58695 | 57 | |
| 63867 | 58 | /* keywords */ | 
| 58695 | 59 | |
| 63579 | 60 | def + (name: String, kind: String = "", tags: List[String] = Nil): Outer_Syntax = | 
| 53280 
c63a016805b9
explicit indication of outer syntax with no tokens;
 wenzelm parents: 
52439diff
changeset | 61 |   {
 | 
| 63429 | 62 | val keywords1 = keywords + (name, kind, tags) | 
| 53280 
c63a016805b9
explicit indication of outer syntax with no tokens;
 wenzelm parents: 
52439diff
changeset | 63 | val completion1 = | 
| 63587 
881e8e2cfec2
implicit keyword completion only for actual words (amending 73939a9b70a3);
 wenzelm parents: 
63584diff
changeset | 64 | completion.add_keyword(name). | 
| 
881e8e2cfec2
implicit keyword completion only for actual words (amending 73939a9b70a3);
 wenzelm parents: 
63584diff
changeset | 65 | add_abbrevs( | 
| 
881e8e2cfec2
implicit keyword completion only for actual words (amending 73939a9b70a3);
 wenzelm parents: 
63584diff
changeset | 66 | (if (Keyword.theory_block.contains(kind)) List((name, name + "\nbegin\n\u0007\nend")) | 
| 
881e8e2cfec2
implicit keyword completion only for actual words (amending 73939a9b70a3);
 wenzelm parents: 
63584diff
changeset | 67 | else Nil) ::: | 
| 
881e8e2cfec2
implicit keyword completion only for actual words (amending 73939a9b70a3);
 wenzelm parents: 
63584diff
changeset | 68 | (if (Completion.Word_Parsers.is_word(name)) List((name, name)) else Nil)) | 
| 63867 | 69 | new Outer_Syntax(keywords1, completion1, rev_abbrevs, language_context, true) | 
| 53280 
c63a016805b9
explicit indication of outer syntax with no tokens;
 wenzelm parents: 
52439diff
changeset | 70 | } | 
| 48706 | 71 | |
| 48873 | 72 | def add_keywords(keywords: Thy_Header.Keywords): Outer_Syntax = | 
| 73 |     (this /: keywords) {
 | |
| 63579 | 74 | case (syntax, (name, ((kind, tags), _))) => | 
| 75 | syntax + (Symbol.decode(name), kind, tags) + (Symbol.encode(name), kind, tags) | |
| 76 | } | |
| 77 | ||
| 63867 | 78 | |
| 79 | /* abbrevs */ | |
| 80 | ||
| 81 | def abbrevs: Thy_Header.Abbrevs = rev_abbrevs.reverse | |
| 82 | ||
| 83 | def add_abbrevs(new_abbrevs: Thy_Header.Abbrevs): Outer_Syntax = | |
| 84 | if (new_abbrevs.isEmpty) this | |
| 63579 | 85 |     else {
 | 
| 86 | val completion1 = | |
| 87 | completion.add_abbrevs( | |
| 63867 | 88 |           (for ((a, b) <- new_abbrevs) yield {
 | 
| 63579 | 89 | val a1 = Symbol.decode(a) | 
| 90 | val a2 = Symbol.encode(a) | |
| 91 | val b1 = Symbol.decode(b) | |
| 92 | List((a1, b1), (a2, b1)) | |
| 93 | }).flatten) | |
| 63867 | 94 | val rev_abbrevs1 = Library.distinct(new_abbrevs) reverse_::: rev_abbrevs | 
| 95 | new Outer_Syntax(keywords, completion1, rev_abbrevs1, language_context, has_tokens) | |
| 46940 | 96 | } | 
| 34166 | 97 | |
| 58695 | 98 | |
| 59073 | 99 | /* merge */ | 
| 100 | ||
| 63584 
68751fe1c036
tuned signature -- prover-independence is presently theoretical;
 wenzelm parents: 
63579diff
changeset | 101 | def ++ (other: Outer_Syntax): Outer_Syntax = | 
| 59073 | 102 | if (this eq other) this | 
| 103 |     else {
 | |
| 63865 | 104 | val keywords1 = keywords ++ other.keywords | 
| 105 | val completion1 = completion ++ other.completion | |
| 63867 | 106 | val rev_abbrevs1 = Library.merge(rev_abbrevs, other.rev_abbrevs) | 
| 59077 
7e0d3da6e6d8
node-specific syntax, with base_syntax as default;
 wenzelm parents: 
59073diff
changeset | 107 | if ((keywords eq keywords1) && (completion eq completion1)) this | 
| 63867 | 108 | else new Outer_Syntax(keywords1, completion1, rev_abbrevs1, language_context, has_tokens) | 
| 59073 | 109 | } | 
| 110 | ||
| 111 | ||
| 59735 | 112 | /* load commands */ | 
| 58900 | 113 | |
| 63441 | 114 | def load_command(name: String): Option[List[String]] = keywords.load_commands.get(name) | 
| 58900 | 115 | def load_commands_in(text: String): Boolean = keywords.load_commands_in(text) | 
| 116 | ||
| 117 | ||
| 58706 | 118 | /* language context */ | 
| 34166 | 119 | |
| 58706 | 120 | def set_language_context(context: Completion.Language_Context): Outer_Syntax = | 
| 63867 | 121 | new Outer_Syntax(keywords, completion, rev_abbrevs, context, has_tokens) | 
| 58706 | 122 | |
| 123 | def no_tokens: Outer_Syntax = | |
| 46969 | 124 |   {
 | 
| 58900 | 125 | require(keywords.is_empty) | 
| 58706 | 126 | new Outer_Syntax( | 
| 127 | completion = completion, | |
| 63867 | 128 | rev_abbrevs = rev_abbrevs, | 
| 58706 | 129 | language_context = language_context, | 
| 130 | has_tokens = false) | |
| 46969 | 131 | } | 
| 40454 
2516ea25a54b
some support for nested source structure, based on section headings;
 wenzelm parents: 
38471diff
changeset | 132 | |
| 58706 | 133 | |
| 40454 
2516ea25a54b
some support for nested source structure, based on section headings;
 wenzelm parents: 
38471diff
changeset | 134 | |
| 58706 | 135 | /** parsing **/ | 
| 34166 | 136 | |
| 58706 | 137 | /* command spans */ | 
| 57905 
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
 wenzelm parents: 
57901diff
changeset | 138 | |
| 
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
 wenzelm parents: 
57901diff
changeset | 139 | def parse_spans(toks: List[Token]): List[Command_Span.Span] = | 
| 
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
 wenzelm parents: 
57901diff
changeset | 140 |   {
 | 
| 
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
 wenzelm parents: 
57901diff
changeset | 141 | val result = new mutable.ListBuffer[Command_Span.Span] | 
| 
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
 wenzelm parents: 
57901diff
changeset | 142 | val content = new mutable.ListBuffer[Token] | 
| 
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
 wenzelm parents: 
57901diff
changeset | 143 | val improper = new mutable.ListBuffer[Token] | 
| 
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
 wenzelm parents: 
57901diff
changeset | 144 | |
| 
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
 wenzelm parents: 
57901diff
changeset | 145 | def ship(span: List[Token]) | 
| 
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
 wenzelm parents: 
57901diff
changeset | 146 |     {
 | 
| 
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
 wenzelm parents: 
57901diff
changeset | 147 | val kind = | 
| 59924 
801b979ec0c2
more general notion of command span: command keyword not necessarily at start;
 wenzelm parents: 
59735diff
changeset | 148 | if (span.forall(_.is_improper)) Command_Span.Ignored_Span | 
| 
801b979ec0c2
more general notion of command span: command keyword not necessarily at start;
 wenzelm parents: 
59735diff
changeset | 149 | else if (span.exists(_.is_error)) Command_Span.Malformed_Span | 
| 
801b979ec0c2
more general notion of command span: command keyword not necessarily at start;
 wenzelm parents: 
59735diff
changeset | 150 | else | 
| 
801b979ec0c2
more general notion of command span: command keyword not necessarily at start;
 wenzelm parents: 
59735diff
changeset | 151 |           span.find(_.is_command) match {
 | 
| 
801b979ec0c2
more general notion of command span: command keyword not necessarily at start;
 wenzelm parents: 
59735diff
changeset | 152 | case None => Command_Span.Malformed_Span | 
| 
801b979ec0c2
more general notion of command span: command keyword not necessarily at start;
 wenzelm parents: 
59735diff
changeset | 153 | case Some(cmd) => | 
| 
801b979ec0c2
more general notion of command span: command keyword not necessarily at start;
 wenzelm parents: 
59735diff
changeset | 154 | val name = cmd.source | 
| 
801b979ec0c2
more general notion of command span: command keyword not necessarily at start;
 wenzelm parents: 
59735diff
changeset | 155 | val offset = | 
| 
801b979ec0c2
more general notion of command span: command keyword not necessarily at start;
 wenzelm parents: 
59735diff
changeset | 156 |                 (0 /: span.takeWhile(_ != cmd)) {
 | 
| 
801b979ec0c2
more general notion of command span: command keyword not necessarily at start;
 wenzelm parents: 
59735diff
changeset | 157 | case (i, tok) => i + Symbol.iterator(tok.source).length } | 
| 
801b979ec0c2
more general notion of command span: command keyword not necessarily at start;
 wenzelm parents: 
59735diff
changeset | 158 | val end_offset = offset + Symbol.iterator(name).length | 
| 
801b979ec0c2
more general notion of command span: command keyword not necessarily at start;
 wenzelm parents: 
59735diff
changeset | 159 | val pos = Position.Range(Text.Range(offset, end_offset) + 1) | 
| 
801b979ec0c2
more general notion of command span: command keyword not necessarily at start;
 wenzelm parents: 
59735diff
changeset | 160 | Command_Span.Command_Span(name, pos) | 
| 
801b979ec0c2
more general notion of command span: command keyword not necessarily at start;
 wenzelm parents: 
59735diff
changeset | 161 | } | 
| 57905 
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
 wenzelm parents: 
57901diff
changeset | 162 | result += Command_Span.Span(kind, span) | 
| 
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
 wenzelm parents: 
57901diff
changeset | 163 | } | 
| 
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
 wenzelm parents: 
57901diff
changeset | 164 | |
| 
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
 wenzelm parents: 
57901diff
changeset | 165 | def flush() | 
| 
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
 wenzelm parents: 
57901diff
changeset | 166 |     {
 | 
| 59319 | 167 |       if (content.nonEmpty) { ship(content.toList); content.clear }
 | 
| 168 |       if (improper.nonEmpty) { ship(improper.toList); improper.clear }
 | |
| 57905 
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
 wenzelm parents: 
57901diff
changeset | 169 | } | 
| 
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
 wenzelm parents: 
57901diff
changeset | 170 | |
| 
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
 wenzelm parents: 
57901diff
changeset | 171 |     for (tok <- toks) {
 | 
| 59924 
801b979ec0c2
more general notion of command span: command keyword not necessarily at start;
 wenzelm parents: 
59735diff
changeset | 172 | if (tok.is_improper) improper += tok | 
| 63441 | 173 | else if (keywords.is_before_command(tok) || | 
| 174 | tok.is_command && | |
| 175 | (!content.exists(keywords.is_before_command(_)) || content.exists(_.is_command))) | |
| 59924 
801b979ec0c2
more general notion of command span: command keyword not necessarily at start;
 wenzelm parents: 
59735diff
changeset | 176 |       { flush(); content += tok }
 | 
| 57905 
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
 wenzelm parents: 
57901diff
changeset | 177 |       else { content ++= improper; improper.clear; content += tok }
 | 
| 
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
 wenzelm parents: 
57901diff
changeset | 178 | } | 
| 
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
 wenzelm parents: 
57901diff
changeset | 179 | flush() | 
| 
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
 wenzelm parents: 
57901diff
changeset | 180 | |
| 
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
 wenzelm parents: 
57901diff
changeset | 181 | result.toList | 
| 
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
 wenzelm parents: 
57901diff
changeset | 182 | } | 
| 
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
 wenzelm parents: 
57901diff
changeset | 183 | |
| 57906 | 184 | def parse_spans(input: CharSequence): List[Command_Span.Span] = | 
| 59083 | 185 | parse_spans(Token.explode(keywords, input)) | 
| 34166 | 186 | } |