author | wenzelm |
Fri, 31 Oct 2014 21:48:40 +0100 | |
changeset 58853 | f8715e7c1be6 |
parent 58753 | 960bf499ca5d |
child 58868 | c5e1cce7ace3 |
permissions | -rw-r--r-- |
34166 | 1 |
/* Title: Pure/Isar/outer_syntax.scala |
2 |
Author: Makarius |
|
3 |
||
4 |
Isabelle/Isar outer syntax. |
|
5 |
*/ |
|
6 |
||
7 |
package isabelle |
|
8 |
||
9 |
||
10 |
import scala.util.parsing.input.{Reader, CharSequenceReader} |
|
43411
0206466ee473
some support for partial scans with explicit context;
wenzelm
parents:
40533
diff
changeset
|
11 |
import scala.collection.mutable |
58706 | 12 |
import scala.annotation.tailrec |
34166 | 13 |
|
14 |
||
43774
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
15 |
object Outer_Syntax |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
16 |
{ |
58706 | 17 |
/* syntax */ |
18 |
||
19 |
val empty: Outer_Syntax = new Outer_Syntax() |
|
20 |
||
21 |
def init(): Outer_Syntax = new Outer_Syntax(completion = Completion.init()) |
|
22 |
||
23 |
||
24 |
/* string literals */ |
|
25 |
||
43774
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
26 |
def quote_string(str: String): String = |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
27 |
{ |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
28 |
val result = new StringBuilder(str.length + 10) |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
29 |
result += '"' |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
30 |
for (s <- Symbol.iterator(str)) { |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
31 |
if (s.length == 1) { |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
32 |
val c = s(0) |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
33 |
if (c < 32 && c != YXML.X && c != YXML.Y || c == '\\' || c == '"') { |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
34 |
result += '\\' |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
35 |
if (c < 10) result += '0' |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
36 |
if (c < 100) result += '0' |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
37 |
result ++= (c.asInstanceOf[Int].toString) |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
38 |
} |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
39 |
else result += c |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
40 |
} |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
41 |
else result ++= s |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
42 |
} |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
43 |
result += '"' |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
44 |
result.toString |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
45 |
} |
46626 | 46 |
|
58696 | 47 |
|
58697 | 48 |
/* line-oriented structure */ |
58696 | 49 |
|
58697 | 50 |
object Line_Structure |
58696 | 51 |
{ |
58700 | 52 |
val init = Line_Structure() |
58696 | 53 |
} |
54 |
||
58700 | 55 |
sealed case class Line_Structure( |
56 |
improper: Boolean = true, |
|
57 |
command: Boolean = false, |
|
58 |
depth: Int = 0, |
|
59 |
span_depth: Int = 0, |
|
60 |
after_span_depth: Int = 0) |
|
58706 | 61 |
|
62 |
||
63 |
/* overall document structure */ |
|
64 |
||
65 |
sealed abstract class Document { def length: Int } |
|
58747 | 66 |
case class Document_Block(name: String, text: String, body: List[Document]) extends Document |
58706 | 67 |
{ |
68 |
val length: Int = (0 /: body)(_ + _.length) |
|
69 |
} |
|
58747 | 70 |
case class Document_Atom(command: Command) extends Document |
58706 | 71 |
{ |
72 |
def length: Int = command.length |
|
73 |
} |
|
43774
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
74 |
} |
6dfdb70496fe
added Outer_Syntax.quote_string, which is conceptually a bit different from Token.unparse;
wenzelm
parents:
43695
diff
changeset
|
75 |
|
46712 | 76 |
final class Outer_Syntax private( |
48864
3ee314ae1e0a
added keyword kind "thy_load" (with optional list of file extensions);
wenzelm
parents:
48708
diff
changeset
|
77 |
keywords: Map[String, (String, List[String])] = Map.empty, |
46626 | 78 |
lexicon: Scan.Lexicon = Scan.Lexicon.empty, |
53280
c63a016805b9
explicit indication of outer syntax with no tokens;
wenzelm
parents:
52439
diff
changeset
|
79 |
val completion: Completion = Completion.empty, |
55749 | 80 |
val language_context: Completion.Language_Context = Completion.Language_Context.outer, |
56393
22f533e6a049
more abstract Prover.Syntax, as proposed by Carst Tankink;
wenzelm
parents:
56314
diff
changeset
|
81 |
val has_tokens: Boolean = true) extends Prover.Syntax |
34166 | 82 |
{ |
58706 | 83 |
/** syntax content **/ |
84 |
||
48660
730ca503e955
static outer syntax based on session specifications;
wenzelm
parents:
47469
diff
changeset
|
85 |
override def toString: String = |
48864
3ee314ae1e0a
added keyword kind "thy_load" (with optional list of file extensions);
wenzelm
parents:
48708
diff
changeset
|
86 |
(for ((name, (kind, files)) <- keywords) yield { |
48660
730ca503e955
static outer syntax based on session specifications;
wenzelm
parents:
47469
diff
changeset
|
87 |
if (kind == Keyword.MINOR) quote(name) |
48864
3ee314ae1e0a
added keyword kind "thy_load" (with optional list of file extensions);
wenzelm
parents:
48708
diff
changeset
|
88 |
else |
3ee314ae1e0a
added keyword kind "thy_load" (with optional list of file extensions);
wenzelm
parents:
48708
diff
changeset
|
89 |
quote(name) + " :: " + quote(kind) + |
3ee314ae1e0a
added keyword kind "thy_load" (with optional list of file extensions);
wenzelm
parents:
48708
diff
changeset
|
90 |
(if (files.isEmpty) "" else " (" + commas_quote(files) + ")") |
48671 | 91 |
}).toList.sorted.mkString("keywords\n ", " and\n ", "") |
48660
730ca503e955
static outer syntax based on session specifications;
wenzelm
parents:
47469
diff
changeset
|
92 |
|
58695 | 93 |
|
94 |
/* keyword kind */ |
|
95 |
||
48864
3ee314ae1e0a
added keyword kind "thy_load" (with optional list of file extensions);
wenzelm
parents:
48708
diff
changeset
|
96 |
def keyword_kind_files(name: String): Option[(String, List[String])] = keywords.get(name) |
3ee314ae1e0a
added keyword kind "thy_load" (with optional list of file extensions);
wenzelm
parents:
48708
diff
changeset
|
97 |
def keyword_kind(name: String): Option[String] = keyword_kind_files(name).map(_._1) |
38471
0924654b8163
report command token name instead of kind, which can be retrieved later via Outer_Syntax.keyword_kind;
wenzelm
parents:
36956
diff
changeset
|
98 |
|
58695 | 99 |
def is_command(name: String): Boolean = |
100 |
keyword_kind(name) match { |
|
101 |
case Some(kind) => kind != Keyword.MINOR |
|
102 |
case None => false |
|
103 |
} |
|
104 |
||
58696 | 105 |
def command_kind(token: Token, pred: String => Boolean): Boolean = |
106 |
token.is_command && is_command(token.source) && |
|
107 |
pred(keyword_kind(token.source).get) |
|
108 |
||
58695 | 109 |
|
110 |
/* load commands */ |
|
111 |
||
57901
e1abca2527da
more explicit type Span in Scala, according to ML version;
wenzelm
parents:
56393
diff
changeset
|
112 |
def load_command(name: String): Option[List[String]] = |
e1abca2527da
more explicit type Span in Scala, according to ML version;
wenzelm
parents:
56393
diff
changeset
|
113 |
keywords.get(name) match { |
54513 | 114 |
case Some((Keyword.THY_LOAD, exts)) => Some(exts) |
54462 | 115 |
case _ => None |
116 |
} |
|
117 |
||
56314 | 118 |
val load_commands: List[(String, List[String])] = |
48885 | 119 |
(for ((name, (Keyword.THY_LOAD, files)) <- keywords.iterator) yield (name, files)).toList |
48872 | 120 |
|
56393
22f533e6a049
more abstract Prover.Syntax, as proposed by Carst Tankink;
wenzelm
parents:
56314
diff
changeset
|
121 |
def load_commands_in(text: String): Boolean = |
22f533e6a049
more abstract Prover.Syntax, as proposed by Carst Tankink;
wenzelm
parents:
56314
diff
changeset
|
122 |
load_commands.exists({ case (cmd, _) => text.containsSlice(cmd) }) |
22f533e6a049
more abstract Prover.Syntax, as proposed by Carst Tankink;
wenzelm
parents:
56314
diff
changeset
|
123 |
|
58695 | 124 |
|
125 |
/* add keywords */ |
|
126 |
||
50128
599c935aac82
alternative completion for outer syntax keywords;
wenzelm
parents:
48885
diff
changeset
|
127 |
def + (name: String, kind: (String, List[String]), replace: Option[String]): Outer_Syntax = |
53280
c63a016805b9
explicit indication of outer syntax with no tokens;
wenzelm
parents:
52439
diff
changeset
|
128 |
{ |
c63a016805b9
explicit indication of outer syntax with no tokens;
wenzelm
parents:
52439
diff
changeset
|
129 |
val keywords1 = keywords + (name -> kind) |
c63a016805b9
explicit indication of outer syntax with no tokens;
wenzelm
parents:
52439
diff
changeset
|
130 |
val lexicon1 = lexicon + name |
c63a016805b9
explicit indication of outer syntax with no tokens;
wenzelm
parents:
52439
diff
changeset
|
131 |
val completion1 = |
58853 | 132 |
if (replace == Some("")) completion |
53280
c63a016805b9
explicit indication of outer syntax with no tokens;
wenzelm
parents:
52439
diff
changeset
|
133 |
else completion + (name, replace getOrElse name) |
55749 | 134 |
new Outer_Syntax(keywords1, lexicon1, completion1, language_context, true) |
53280
c63a016805b9
explicit indication of outer syntax with no tokens;
wenzelm
parents:
52439
diff
changeset
|
135 |
} |
34166 | 136 |
|
53280
c63a016805b9
explicit indication of outer syntax with no tokens;
wenzelm
parents:
52439
diff
changeset
|
137 |
def + (name: String, kind: (String, List[String])): Outer_Syntax = |
c63a016805b9
explicit indication of outer syntax with no tokens;
wenzelm
parents:
52439
diff
changeset
|
138 |
this + (name, kind, Some(name)) |
c63a016805b9
explicit indication of outer syntax with no tokens;
wenzelm
parents:
52439
diff
changeset
|
139 |
def + (name: String, kind: String): Outer_Syntax = |
c63a016805b9
explicit indication of outer syntax with no tokens;
wenzelm
parents:
52439
diff
changeset
|
140 |
this + (name, (kind, Nil), Some(name)) |
50128
599c935aac82
alternative completion for outer syntax keywords;
wenzelm
parents:
48885
diff
changeset
|
141 |
def + (name: String, replace: Option[String]): Outer_Syntax = |
599c935aac82
alternative completion for outer syntax keywords;
wenzelm
parents:
48885
diff
changeset
|
142 |
this + (name, (Keyword.MINOR, Nil), replace) |
599c935aac82
alternative completion for outer syntax keywords;
wenzelm
parents:
48885
diff
changeset
|
143 |
def + (name: String): Outer_Syntax = this + (name, None) |
48706 | 144 |
|
48873 | 145 |
def add_keywords(keywords: Thy_Header.Keywords): Outer_Syntax = |
146 |
(this /: keywords) { |
|
52439
4cf3f6153eb8
improved "isabelle keywords" and "isabelle update_keywords" based on Isabelle/Scala, without requiring to build sessions first;
wenzelm
parents:
52066
diff
changeset
|
147 |
case (syntax, (name, Some((kind, _)), replace)) => |
50128
599c935aac82
alternative completion for outer syntax keywords;
wenzelm
parents:
48885
diff
changeset
|
148 |
syntax + |
599c935aac82
alternative completion for outer syntax keywords;
wenzelm
parents:
48885
diff
changeset
|
149 |
(Symbol.decode(name), kind, replace) + |
599c935aac82
alternative completion for outer syntax keywords;
wenzelm
parents:
48885
diff
changeset
|
150 |
(Symbol.encode(name), kind, replace) |
52439
4cf3f6153eb8
improved "isabelle keywords" and "isabelle update_keywords" based on Isabelle/Scala, without requiring to build sessions first;
wenzelm
parents:
52066
diff
changeset
|
151 |
case (syntax, (name, None, replace)) => |
50128
599c935aac82
alternative completion for outer syntax keywords;
wenzelm
parents:
48885
diff
changeset
|
152 |
syntax + |
599c935aac82
alternative completion for outer syntax keywords;
wenzelm
parents:
48885
diff
changeset
|
153 |
(Symbol.decode(name), replace) + |
599c935aac82
alternative completion for outer syntax keywords;
wenzelm
parents:
48885
diff
changeset
|
154 |
(Symbol.encode(name), replace) |
46940 | 155 |
} |
34166 | 156 |
|
58695 | 157 |
|
58706 | 158 |
/* language context */ |
34166 | 159 |
|
58706 | 160 |
def set_language_context(context: Completion.Language_Context): Outer_Syntax = |
161 |
new Outer_Syntax(keywords, lexicon, completion, context, has_tokens) |
|
162 |
||
163 |
def no_tokens: Outer_Syntax = |
|
46969 | 164 |
{ |
58706 | 165 |
require(keywords.isEmpty && lexicon.isEmpty) |
166 |
new Outer_Syntax( |
|
167 |
completion = completion, |
|
168 |
language_context = language_context, |
|
169 |
has_tokens = false) |
|
46969 | 170 |
} |
40454
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38471
diff
changeset
|
171 |
|
58706 | 172 |
|
40454
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38471
diff
changeset
|
173 |
|
58706 | 174 |
/** parsing **/ |
34166 | 175 |
|
58697 | 176 |
/* line-oriented structure */ |
58696 | 177 |
|
58700 | 178 |
def line_structure(tokens: List[Token], struct: Outer_Syntax.Line_Structure) |
179 |
: Outer_Syntax.Line_Structure = |
|
58696 | 180 |
{ |
58700 | 181 |
val improper1 = tokens.forall(_.is_improper) |
182 |
val command1 = tokens.exists(_.is_command) |
|
183 |
||
58696 | 184 |
val depth1 = |
185 |
if (tokens.exists(tok => command_kind(tok, Keyword.theory))) 0 |
|
58700 | 186 |
else if (command1) struct.after_span_depth |
187 |
else struct.span_depth |
|
188 |
||
189 |
val (span_depth1, after_span_depth1) = |
|
190 |
((struct.span_depth, struct.after_span_depth) /: tokens) { |
|
58703 | 191 |
case ((x, y), tok) => |
192 |
if (tok.is_command) { |
|
193 |
if (command_kind(tok, Keyword.theory_goal)) (2, 1) |
|
194 |
else if (command_kind(tok, Keyword.theory)) (1, 0) |
|
58753 | 195 |
else if (command_kind(tok, Keyword.proof_goal) || tok.is_begin_block) (y + 2, y + 1) |
196 |
else if (command_kind(tok, Keyword.qed) || tok.is_end_block) (y + 1, y - 1) |
|
58703 | 197 |
else if (command_kind(tok, Keyword.qed_global)) (1, 0) |
198 |
else (x, y) |
|
199 |
} |
|
200 |
else (x, y) |
|
58696 | 201 |
} |
58700 | 202 |
|
203 |
Outer_Syntax.Line_Structure(improper1, command1, depth1, span_depth1, after_span_depth1) |
|
58696 | 204 |
} |
205 |
||
206 |
||
53280
c63a016805b9
explicit indication of outer syntax with no tokens;
wenzelm
parents:
52439
diff
changeset
|
207 |
/* token language */ |
c63a016805b9
explicit indication of outer syntax with no tokens;
wenzelm
parents:
52439
diff
changeset
|
208 |
|
57907 | 209 |
def scan(input: CharSequence): List[Token] = |
52066
83b7b88770c9
discontinued odd workaround for scala-2.9.2, which is hopefully obsolete in scala-2.10.x;
wenzelm
parents:
50428
diff
changeset
|
210 |
{ |
58503 | 211 |
val in: Reader[Char] = new CharSequenceReader(input) |
55616 | 212 |
Token.Parsers.parseAll( |
57907 | 213 |
Token.Parsers.rep(Token.Parsers.token(lexicon, is_command)), in) match { |
55494 | 214 |
case Token.Parsers.Success(tokens, _) => tokens |
57907 | 215 |
case _ => error("Unexpected failure of tokenizing input:\n" + input.toString) |
34166 | 216 |
} |
52066
83b7b88770c9
discontinued odd workaround for scala-2.9.2, which is hopefully obsolete in scala-2.10.x;
wenzelm
parents:
50428
diff
changeset
|
217 |
} |
34166 | 218 |
|
58748 | 219 |
def scan_line(input: CharSequence, context: Scan.Line_Context): (List[Token], Scan.Line_Context) = |
52066
83b7b88770c9
discontinued odd workaround for scala-2.9.2, which is hopefully obsolete in scala-2.10.x;
wenzelm
parents:
50428
diff
changeset
|
220 |
{ |
83b7b88770c9
discontinued odd workaround for scala-2.9.2, which is hopefully obsolete in scala-2.10.x;
wenzelm
parents:
50428
diff
changeset
|
221 |
var in: Reader[Char] = new CharSequenceReader(input) |
83b7b88770c9
discontinued odd workaround for scala-2.9.2, which is hopefully obsolete in scala-2.10.x;
wenzelm
parents:
50428
diff
changeset
|
222 |
val toks = new mutable.ListBuffer[Token] |
83b7b88770c9
discontinued odd workaround for scala-2.9.2, which is hopefully obsolete in scala-2.10.x;
wenzelm
parents:
50428
diff
changeset
|
223 |
var ctxt = context |
83b7b88770c9
discontinued odd workaround for scala-2.9.2, which is hopefully obsolete in scala-2.10.x;
wenzelm
parents:
50428
diff
changeset
|
224 |
while (!in.atEnd) { |
55510
1585a65aad64
tuned signature -- emphasize line-oriented aspect;
wenzelm
parents:
55494
diff
changeset
|
225 |
Token.Parsers.parse(Token.Parsers.token_line(lexicon, is_command, ctxt), in) match { |
55494 | 226 |
case Token.Parsers.Success((x, c), rest) => { toks += x; ctxt = c; in = rest } |
227 |
case Token.Parsers.NoSuccess(_, rest) => |
|
52066
83b7b88770c9
discontinued odd workaround for scala-2.9.2, which is hopefully obsolete in scala-2.10.x;
wenzelm
parents:
50428
diff
changeset
|
228 |
error("Unexpected failure of tokenizing input:\n" + rest.source.toString) |
43411
0206466ee473
some support for partial scans with explicit context;
wenzelm
parents:
40533
diff
changeset
|
229 |
} |
0206466ee473
some support for partial scans with explicit context;
wenzelm
parents:
40533
diff
changeset
|
230 |
} |
58748 | 231 |
(toks.toList, ctxt) |
52066
83b7b88770c9
discontinued odd workaround for scala-2.9.2, which is hopefully obsolete in scala-2.10.x;
wenzelm
parents:
50428
diff
changeset
|
232 |
} |
55616 | 233 |
|
234 |
||
58706 | 235 |
/* command spans */ |
57905
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
236 |
|
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
237 |
def parse_spans(toks: List[Token]): List[Command_Span.Span] = |
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
238 |
{ |
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
239 |
val result = new mutable.ListBuffer[Command_Span.Span] |
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
240 |
val content = new mutable.ListBuffer[Token] |
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
241 |
val improper = new mutable.ListBuffer[Token] |
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
242 |
|
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
243 |
def ship(span: List[Token]) |
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
244 |
{ |
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
245 |
val kind = |
57910 | 246 |
if (!span.isEmpty && span.head.is_command && !span.exists(_.is_error)) { |
247 |
val name = span.head.source |
|
57911
dcb758188aa6
clarified Position.Identified: do not require range from prover, default to command position;
wenzelm
parents:
57910
diff
changeset
|
248 |
val pos = Position.Range(Text.Range(0, Symbol.iterator(name).length) + 1) |
57910 | 249 |
Command_Span.Command_Span(name, pos) |
250 |
} |
|
57905
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
251 |
else if (span.forall(_.is_improper)) Command_Span.Ignored_Span |
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
252 |
else Command_Span.Malformed_Span |
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
253 |
result += Command_Span.Span(kind, span) |
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
254 |
} |
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
255 |
|
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
256 |
def flush() |
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
257 |
{ |
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
258 |
if (!content.isEmpty) { ship(content.toList); content.clear } |
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
259 |
if (!improper.isEmpty) { ship(improper.toList); improper.clear } |
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
260 |
} |
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
261 |
|
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
262 |
for (tok <- toks) { |
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
263 |
if (tok.is_command) { flush(); content += tok } |
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
264 |
else if (tok.is_improper) improper += tok |
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
265 |
else { content ++= improper; improper.clear; content += tok } |
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
266 |
} |
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
267 |
flush() |
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
268 |
|
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
269 |
result.toList |
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
270 |
} |
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
271 |
|
57906 | 272 |
def parse_spans(input: CharSequence): List[Command_Span.Span] = |
273 |
parse_spans(scan(input)) |
|
274 |
||
57905
c0c5652e796e
separate module Command_Span: mostly syntactic representation;
wenzelm
parents:
57901
diff
changeset
|
275 |
|
58706 | 276 |
/* overall document structure */ |
55616 | 277 |
|
58706 | 278 |
def heading_level(command: Command): Option[Int] = |
279 |
{ |
|
280 |
keyword_kind(command.name) match { |
|
281 |
case _ if command.name == "header" => Some(0) |
|
282 |
case Some(Keyword.THY_HEADING1) => Some(1) |
|
283 |
case Some(Keyword.THY_HEADING2) | Some(Keyword.PRF_HEADING2) => Some(2) |
|
284 |
case Some(Keyword.THY_HEADING3) | Some(Keyword.PRF_HEADING3) => Some(3) |
|
285 |
case Some(Keyword.THY_HEADING4) | Some(Keyword.PRF_HEADING4) => Some(4) |
|
286 |
case Some(kind) if Keyword.theory(kind) => Some(5) |
|
287 |
case _ => None |
|
288 |
} |
|
289 |
} |
|
290 |
||
58743 | 291 |
def parse_document(node_name: Document.Node.Name, text: CharSequence): |
292 |
List[Outer_Syntax.Document] = |
|
58706 | 293 |
{ |
294 |
/* stack operations */ |
|
295 |
||
296 |
def buffer(): mutable.ListBuffer[Outer_Syntax.Document] = |
|
297 |
new mutable.ListBuffer[Outer_Syntax.Document] |
|
298 |
||
58747 | 299 |
var stack: List[(Int, Command, mutable.ListBuffer[Outer_Syntax.Document])] = |
300 |
List((0, Command.empty, buffer())) |
|
55616 | 301 |
|
58706 | 302 |
@tailrec def close(level: Int => Boolean) |
303 |
{ |
|
304 |
stack match { |
|
58747 | 305 |
case (lev, command, body) :: (_, _, body2) :: rest if level(lev) => |
306 |
body2 += Outer_Syntax.Document_Block(command.name, command.source, body.toList) |
|
58706 | 307 |
stack = stack.tail |
308 |
close(level) |
|
309 |
case _ => |
|
310 |
} |
|
311 |
} |
|
312 |
||
58743 | 313 |
def result(): List[Outer_Syntax.Document] = |
58706 | 314 |
{ |
315 |
close(_ => true) |
|
58743 | 316 |
stack.head._3.toList |
58706 | 317 |
} |
318 |
||
319 |
def add(command: Command) |
|
320 |
{ |
|
321 |
heading_level(command) match { |
|
322 |
case Some(i) => |
|
323 |
close(_ > i) |
|
58747 | 324 |
stack = (i + 1, command, buffer()) :: stack |
58706 | 325 |
case None => |
326 |
} |
|
327 |
stack.head._3 += Outer_Syntax.Document_Atom(command) |
|
328 |
} |
|
329 |
||
330 |
||
331 |
/* result structure */ |
|
332 |
||
333 |
val spans = parse_spans(text) |
|
334 |
spans.foreach(span => add(Command(Document_ID.none, node_name, Nil, span))) |
|
335 |
result() |
|
55616 | 336 |
} |
34166 | 337 |
} |