author | wenzelm |
Thu, 07 Jul 2011 22:04:30 +0200 | |
changeset 43697 | 77ce24aa1770 |
parent 43662 | e3175ec00311 |
child 43715 | 518e44a0ee15 |
permissions | -rw-r--r-- |
34268 | 1 |
/* Title: Pure/Thy/thy_syntax.scala |
2 |
Author: Makarius |
|
3 |
||
38374 | 4 |
Superficial theory syntax: tokens and spans. |
34268 | 5 |
*/ |
6 |
||
7 |
package isabelle |
|
8 |
||
9 |
||
38239
89a4d1028fb3
parse_spans: somewhat faster low-level implementation;
wenzelm
parents:
36956
diff
changeset
|
10 |
import scala.collection.mutable |
38374 | 11 |
import scala.annotation.tailrec |
38239
89a4d1028fb3
parse_spans: somewhat faster low-level implementation;
wenzelm
parents:
36956
diff
changeset
|
12 |
|
89a4d1028fb3
parse_spans: somewhat faster low-level implementation;
wenzelm
parents:
36956
diff
changeset
|
13 |
|
34303 | 14 |
object Thy_Syntax |
34268 | 15 |
{ |
40454
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
16 |
/** nested structure **/ |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
17 |
|
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
18 |
object Structure |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
19 |
{ |
40478
4bae781b8f7c
replaced Document.Node_Text_Edit by Document.Text_Edit, with treatment of deleted nodes;
wenzelm
parents:
40457
diff
changeset
|
20 |
sealed abstract class Entry { def length: Int } |
40454
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
21 |
case class Block(val name: String, val body: List[Entry]) extends Entry |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
22 |
{ |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
23 |
val length: Int = (0 /: body)(_ + _.length) |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
24 |
} |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
25 |
case class Atom(val command: Command) extends Entry |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
26 |
{ |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
27 |
def length: Int = command.length |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
28 |
} |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
29 |
|
40792 | 30 |
def parse(syntax: Outer_Syntax, root_name: String, text: CharSequence): Entry = |
40454
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
31 |
{ |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
32 |
/* stack operations */ |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
33 |
|
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
34 |
def buffer(): mutable.ListBuffer[Entry] = new mutable.ListBuffer[Entry] |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
35 |
var stack: List[(Int, String, mutable.ListBuffer[Entry])] = List((0, root_name, buffer())) |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
36 |
|
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
37 |
@tailrec def close(level: Int => Boolean) |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
38 |
{ |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
39 |
stack match { |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
40 |
case (lev, name, body) :: (_, _, body2) :: rest if level(lev) => |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
41 |
body2 += Block(name, body.toList) |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
42 |
stack = stack.tail |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
43 |
close(level) |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
44 |
case _ => |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
45 |
} |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
46 |
} |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
47 |
|
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
48 |
def result(): Entry = |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
49 |
{ |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
50 |
close(_ => true) |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
51 |
val (_, name, body) = stack.head |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
52 |
Block(name, body.toList) |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
53 |
} |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
54 |
|
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
55 |
def add(command: Command) |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
56 |
{ |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
57 |
syntax.heading_level(command) match { |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
58 |
case Some(i) => |
40457 | 59 |
close(_ >= i) |
40454
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
60 |
stack = (i, command.source, buffer()) :: stack |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
61 |
case None => |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
62 |
} |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
63 |
stack.head._3 += Atom(command) |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
64 |
} |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
65 |
|
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
66 |
|
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
67 |
/* result structure */ |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
68 |
|
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
69 |
val spans = parse_spans(syntax.scan(text)) |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
70 |
spans.foreach(span => add(Command.span(span))) |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
71 |
result() |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
72 |
} |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
73 |
} |
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
74 |
|
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
75 |
|
2516ea25a54b
some support for nested source structure, based on section headings;
wenzelm
parents:
38878
diff
changeset
|
76 |
|
38374 | 77 |
/** parse spans **/ |
78 |
||
38373 | 79 |
def parse_spans(toks: List[Token]): List[List[Token]] = |
34268 | 80 |
{ |
38373 | 81 |
val result = new mutable.ListBuffer[List[Token]] |
38239
89a4d1028fb3
parse_spans: somewhat faster low-level implementation;
wenzelm
parents:
36956
diff
changeset
|
82 |
val span = new mutable.ListBuffer[Token] |
89a4d1028fb3
parse_spans: somewhat faster low-level implementation;
wenzelm
parents:
36956
diff
changeset
|
83 |
val whitespace = new mutable.ListBuffer[Token] |
34268 | 84 |
|
38239
89a4d1028fb3
parse_spans: somewhat faster low-level implementation;
wenzelm
parents:
36956
diff
changeset
|
85 |
def flush(buffer: mutable.ListBuffer[Token]) |
89a4d1028fb3
parse_spans: somewhat faster low-level implementation;
wenzelm
parents:
36956
diff
changeset
|
86 |
{ |
89a4d1028fb3
parse_spans: somewhat faster low-level implementation;
wenzelm
parents:
36956
diff
changeset
|
87 |
if (!buffer.isEmpty) { result += buffer.toList; buffer.clear } |
34268 | 88 |
} |
38239
89a4d1028fb3
parse_spans: somewhat faster low-level implementation;
wenzelm
parents:
36956
diff
changeset
|
89 |
for (tok <- toks) { |
89a4d1028fb3
parse_spans: somewhat faster low-level implementation;
wenzelm
parents:
36956
diff
changeset
|
90 |
if (tok.is_command) { flush(span); flush(whitespace); span += tok } |
89a4d1028fb3
parse_spans: somewhat faster low-level implementation;
wenzelm
parents:
36956
diff
changeset
|
91 |
else if (tok.is_ignored) whitespace += tok |
89a4d1028fb3
parse_spans: somewhat faster low-level implementation;
wenzelm
parents:
36956
diff
changeset
|
92 |
else { span ++= whitespace; whitespace.clear; span += tok } |
89a4d1028fb3
parse_spans: somewhat faster low-level implementation;
wenzelm
parents:
36956
diff
changeset
|
93 |
} |
89a4d1028fb3
parse_spans: somewhat faster low-level implementation;
wenzelm
parents:
36956
diff
changeset
|
94 |
flush(span); flush(whitespace) |
89a4d1028fb3
parse_spans: somewhat faster low-level implementation;
wenzelm
parents:
36956
diff
changeset
|
95 |
result.toList |
34268 | 96 |
} |
38374 | 97 |
|
98 |
||
99 |
||
100 |
/** text edits **/ |
|
101 |
||
43662
e3175ec00311
Document.no_id/new_id as in ML (new_id *could* be session-specific but it isn't right now);
wenzelm
parents:
43660
diff
changeset
|
102 |
def text_edits(syntax: Outer_Syntax, previous: Document.Version, |
40479 | 103 |
edits: List[Document.Edit_Text]): (List[Document.Edit_Command], Document.Version) = |
38374 | 104 |
{ |
105 |
/* phase 1: edit individual command source */ |
|
106 |
||
38425 | 107 |
@tailrec def edit_text(eds: List[Text.Edit], commands: Linear_Set[Command]) |
38374 | 108 |
: Linear_Set[Command] = |
109 |
{ |
|
110 |
eds match { |
|
111 |
case e :: es => |
|
112 |
Document.Node.command_starts(commands.iterator).find { |
|
113 |
case (cmd, cmd_start) => |
|
114 |
e.can_edit(cmd.source, cmd_start) || |
|
115 |
e.is_insert && e.start == cmd_start + cmd.length |
|
116 |
} match { |
|
117 |
case Some((cmd, cmd_start)) if e.can_edit(cmd.source, cmd_start) => |
|
118 |
val (rest, text) = e.edit(cmd.source, cmd_start) |
|
119 |
val new_commands = commands.insert_after(Some(cmd), Command.unparsed(text)) - cmd |
|
120 |
edit_text(rest.toList ::: es, new_commands) |
|
121 |
||
122 |
case Some((cmd, cmd_start)) => |
|
123 |
edit_text(es, commands.insert_after(Some(cmd), Command.unparsed(e.text))) |
|
124 |
||
125 |
case None => |
|
126 |
require(e.is_insert && e.start == 0) |
|
127 |
edit_text(es, commands.insert_after(None, Command.unparsed(e.text))) |
|
128 |
} |
|
129 |
case Nil => commands |
|
130 |
} |
|
131 |
} |
|
132 |
||
133 |
||
134 |
/* phase 2: recover command spans */ |
|
135 |
||
136 |
@tailrec def recover_spans(commands: Linear_Set[Command]): Linear_Set[Command] = |
|
137 |
{ |
|
138 |
commands.iterator.find(_.is_unparsed) match { |
|
139 |
case Some(first_unparsed) => |
|
140 |
val first = |
|
38878
1d5b3175fd30
text_edits/recover_spans: reparse at least until line boundary -- increases chance of recovery for bad ML text, for example;
wenzelm
parents:
38569
diff
changeset
|
141 |
commands.reverse_iterator(first_unparsed). |
1d5b3175fd30
text_edits/recover_spans: reparse at least until line boundary -- increases chance of recovery for bad ML text, for example;
wenzelm
parents:
38569
diff
changeset
|
142 |
dropWhile(_.newlines == 0).find(_.is_command) getOrElse commands.head |
38374 | 143 |
val last = |
38878
1d5b3175fd30
text_edits/recover_spans: reparse at least until line boundary -- increases chance of recovery for bad ML text, for example;
wenzelm
parents:
38569
diff
changeset
|
144 |
commands.iterator(first_unparsed). |
1d5b3175fd30
text_edits/recover_spans: reparse at least until line boundary -- increases chance of recovery for bad ML text, for example;
wenzelm
parents:
38569
diff
changeset
|
145 |
dropWhile(_.newlines == 0).find(_.is_command) getOrElse commands.last |
38374 | 146 |
val range = |
147 |
commands.iterator(first).takeWhile(_ != last).toList ::: List(last) |
|
148 |
||
149 |
val sources = range.flatMap(_.span.map(_.source)) |
|
43647 | 150 |
val spans0 = parse_spans(syntax.scan(sources.mkString)) |
38374 | 151 |
|
152 |
val (before_edit, spans1) = |
|
153 |
if (!spans0.isEmpty && first.is_command && first.span == spans0.head) |
|
154 |
(Some(first), spans0.tail) |
|
155 |
else (commands.prev(first), spans0) |
|
156 |
||
157 |
val (after_edit, spans2) = |
|
158 |
if (!spans1.isEmpty && last.is_command && last.span == spans1.last) |
|
159 |
(Some(last), spans1.take(spans1.length - 1)) |
|
160 |
else (commands.next(last), spans1) |
|
161 |
||
43662
e3175ec00311
Document.no_id/new_id as in ML (new_id *could* be session-specific but it isn't right now);
wenzelm
parents:
43660
diff
changeset
|
162 |
val inserted = spans2.map(span => new Command(Document.new_id(), span)) |
38374 | 163 |
val new_commands = |
164 |
commands.delete_between(before_edit, after_edit).append_after(before_edit, inserted) |
|
165 |
recover_spans(new_commands) |
|
166 |
||
167 |
case None => commands |
|
168 |
} |
|
169 |
} |
|
170 |
||
171 |
||
172 |
/* resulting document edits */ |
|
173 |
||
174 |
{ |
|
40479 | 175 |
val doc_edits = new mutable.ListBuffer[Document.Edit_Command] |
38417 | 176 |
var nodes = previous.nodes |
38374 | 177 |
|
40478
4bae781b8f7c
replaced Document.Node_Text_Edit by Document.Text_Edit, with treatment of deleted nodes;
wenzelm
parents:
40457
diff
changeset
|
178 |
edits foreach { |
4bae781b8f7c
replaced Document.Node_Text_Edit by Document.Text_Edit, with treatment of deleted nodes;
wenzelm
parents:
40457
diff
changeset
|
179 |
case (name, None) => |
4bae781b8f7c
replaced Document.Node_Text_Edit by Document.Text_Edit, with treatment of deleted nodes;
wenzelm
parents:
40457
diff
changeset
|
180 |
doc_edits += (name -> None) |
4bae781b8f7c
replaced Document.Node_Text_Edit by Document.Text_Edit, with treatment of deleted nodes;
wenzelm
parents:
40457
diff
changeset
|
181 |
nodes -= name |
4bae781b8f7c
replaced Document.Node_Text_Edit by Document.Text_Edit, with treatment of deleted nodes;
wenzelm
parents:
40457
diff
changeset
|
182 |
|
4bae781b8f7c
replaced Document.Node_Text_Edit by Document.Text_Edit, with treatment of deleted nodes;
wenzelm
parents:
40457
diff
changeset
|
183 |
case (name, Some(text_edits)) => |
43697
77ce24aa1770
explicit Document.Node.Header, with master_dir and thy_name;
wenzelm
parents:
43662
diff
changeset
|
184 |
val node = nodes(name) |
77ce24aa1770
explicit Document.Node.Header, with master_dir and thy_name;
wenzelm
parents:
43662
diff
changeset
|
185 |
val commands0 = node.commands |
40478
4bae781b8f7c
replaced Document.Node_Text_Edit by Document.Text_Edit, with treatment of deleted nodes;
wenzelm
parents:
40457
diff
changeset
|
186 |
val commands1 = edit_text(text_edits, commands0) |
4bae781b8f7c
replaced Document.Node_Text_Edit by Document.Text_Edit, with treatment of deleted nodes;
wenzelm
parents:
40457
diff
changeset
|
187 |
val commands2 = recover_spans(commands1) // FIXME somewhat slow |
38374 | 188 |
|
40478
4bae781b8f7c
replaced Document.Node_Text_Edit by Document.Text_Edit, with treatment of deleted nodes;
wenzelm
parents:
40457
diff
changeset
|
189 |
val removed_commands = commands0.iterator.filter(!commands2.contains(_)).toList |
4bae781b8f7c
replaced Document.Node_Text_Edit by Document.Text_Edit, with treatment of deleted nodes;
wenzelm
parents:
40457
diff
changeset
|
190 |
val inserted_commands = commands2.iterator.filter(!commands0.contains(_)).toList |
38374 | 191 |
|
40478
4bae781b8f7c
replaced Document.Node_Text_Edit by Document.Text_Edit, with treatment of deleted nodes;
wenzelm
parents:
40457
diff
changeset
|
192 |
val cmd_edits = |
4bae781b8f7c
replaced Document.Node_Text_Edit by Document.Text_Edit, with treatment of deleted nodes;
wenzelm
parents:
40457
diff
changeset
|
193 |
removed_commands.reverse.map(cmd => (commands0.prev(cmd), None)) ::: |
4bae781b8f7c
replaced Document.Node_Text_Edit by Document.Text_Edit, with treatment of deleted nodes;
wenzelm
parents:
40457
diff
changeset
|
194 |
inserted_commands.map(cmd => (commands2.prev(cmd), Some(cmd))) |
38374 | 195 |
|
40478
4bae781b8f7c
replaced Document.Node_Text_Edit by Document.Text_Edit, with treatment of deleted nodes;
wenzelm
parents:
40457
diff
changeset
|
196 |
doc_edits += (name -> Some(cmd_edits)) |
43697
77ce24aa1770
explicit Document.Node.Header, with master_dir and thy_name;
wenzelm
parents:
43662
diff
changeset
|
197 |
nodes += (name -> new Document.Node(node.header, commands2)) |
38374 | 198 |
} |
43662
e3175ec00311
Document.no_id/new_id as in ML (new_id *could* be session-specific but it isn't right now);
wenzelm
parents:
43660
diff
changeset
|
199 |
(doc_edits.toList, new Document.Version(Document.new_id(), nodes)) |
38374 | 200 |
} |
201 |
} |
|
34268 | 202 |
} |